diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -538,8 +538,12 @@ if (NextJTAddress) UpperBound = std::min(NextJTAddress, UpperBound); - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: analyzeJumpTable in " << BF.getPrintName() - << '\n'); + LLVM_DEBUG({ + using JTT = JumpTable::JumpTableType; + dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n", + Address, BF.getPrintName(), + Type == JTT::JTT_PIC ? "PIC" : "Normal"); + }); const uint64_t EntrySize = getJumpTableEntrySize(Type); for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize; EntryAddress += EntrySize) { @@ -570,7 +574,7 @@ if (Value == BF.getAddress() + BF.getSize()) { addEntryAddress(Value); HasUnreachable = true; - LLVM_DEBUG(dbgs() << "OK: __builtin_unreachable\n"); + LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value)); continue; } @@ -585,12 +589,12 @@ if (TargetBF) { dbgs() << " ! function containing this address: " << TargetBF->getPrintName() << '\n'; - if (TargetBF->isFragment()) - dbgs() << " ! is a fragment\n"; - for (BinaryFunction *TargetParent : TargetBF->ParentFragments) - dbgs() << " ! its parent is " - << (TargetParent ? TargetParent->getPrintName() : "(none)") - << '\n'; + if (TargetBF->isFragment()) { + dbgs() << " ! is a fragment"; + for (BinaryFunction *Parent : TargetBF->ParentFragments) + dbgs() << ", parent: " << Parent->getPrintName(); + dbgs() << '\n'; + } } } if (Value == BF.getAddress()) @@ -602,11 +606,12 @@ // Check there's an instruction at this offset. if (TargetBF->getState() == BinaryFunction::State::Disassembled && !TargetBF->getInstructionAtOffset(Value - TargetBF->getAddress())) { - LLVM_DEBUG(dbgs() << "FAIL: no instruction at this offset\n"); + LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value)); break; } ++NumRealEntries; + LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value)); if (TargetBF != &BF) BF.setHasIndirectTargetToSplitFragment(true); diff --git a/clang-tools-extra/clang-doc/Serialize.cpp b/clang-tools-extra/clang-doc/Serialize.cpp --- a/clang-tools-extra/clang-doc/Serialize.cpp +++ b/clang-tools-extra/clang-doc/Serialize.cpp @@ -168,7 +168,7 @@ } bool ClangDocCommentVisitor::isWhitespaceOnly(llvm::StringRef S) const { - return std::all_of(S.begin(), S.end(), isspace); + return llvm::all_of(S, isspace); } std::string ClangDocCommentVisitor::getCommandName(unsigned CommandID) const { diff --git a/clang-tools-extra/clang-move/Move.cpp b/clang-tools-extra/clang-move/Move.cpp --- a/clang-tools-extra/clang-move/Move.cpp +++ b/clang-tools-extra/clang-move/Move.cpp @@ -920,8 +920,7 @@ return false; } }; - if (std::none_of(UnremovedDeclsInOldHeader.begin(), - UnremovedDeclsInOldHeader.end(), IsSupportedKind) && + if (llvm::none_of(UnremovedDeclsInOldHeader, IsSupportedKind) && !Context->Spec.OldHeader.empty()) { auto &SM = RemovedDecls[0]->getASTContext().getSourceManager(); moveAll(SM, Context->Spec.OldHeader, Context->Spec.NewHeader); diff --git a/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.cpp b/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.cpp --- a/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.cpp +++ b/clang-tools-extra/clang-tidy/objc/NSDateFormatterCheck.cpp @@ -38,14 +38,10 @@ // set of reserved characters. See: // https://www.unicode.org/reports/tr35/tr35.html#Invalid_Patterns bool isValidDatePattern(StringRef Pattern) { - for (auto &PatternChar : Pattern) { - if (isalpha(PatternChar)) { - if (!llvm::is_contained(ValidDatePatternChars, PatternChar)) { - return false; - } - } - } - return true; + return llvm::all_of(Pattern, [](const auto &PatternChar) { + return !isalpha(PatternChar) || + llvm::is_contained(ValidDatePatternChars, PatternChar); + }); } // Checks if the string pattern used as a date format specifier contains diff --git a/clang-tools-extra/clangd/URI.cpp b/clang-tools-extra/clangd/URI.cpp --- a/clang-tools-extra/clangd/URI.cpp +++ b/clang-tools-extra/clangd/URI.cpp @@ -142,7 +142,7 @@ return false; if (!llvm::isAlpha(Scheme[0])) return false; - return std::all_of(Scheme.begin() + 1, Scheme.end(), [](char C) { + return llvm::all_of(llvm::drop_begin(Scheme), [](char C) { return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-'; }); } diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp --- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp @@ -777,12 +777,11 @@ llvm::sys::path::end(Path)) <= MaxSuffixComponents; })); // ... and precise. - assert(llvm::find_if(SystemHeaderMap->keys(), [](llvm::StringRef Path) { - return std::distance(llvm::sys::path::begin( - Path, llvm::sys::path::Style::posix), - llvm::sys::path::end(Path)) == - MaxSuffixComponents; - }) != SystemHeaderMap->keys().end()); + assert(llvm::any_of(SystemHeaderMap->keys(), [](llvm::StringRef Path) { + return std::distance( + llvm::sys::path::begin(Path, llvm::sys::path::Style::posix), + llvm::sys::path::end(Path)) == MaxSuffixComponents; + })); // FIXME: Suffix mapping contains invalid entries for C, so only enable it for // CPP. diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -757,12 +757,12 @@ return StartOffset.takeError(); if (!EndOffset) return EndOffset.takeError(); - if (llvm::find_if( + if (llvm::none_of( *MainFileRenameEdit, [&StartOffset, &EndOffset](const clang::tooling::Replacement &R) { return R.getOffset() == *StartOffset && R.getLength() == *EndOffset - *StartOffset; - }) == MainFileRenameEdit->end()) { + })) { return makeError(ReasonToReject::NoSymbolFound); } RenameResult Result; diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf --- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -375,18 +375,19 @@ simple-type-specifier := decltype-specifier simple-type-specifier := placeholder-type-specifier simple-type-specifier := nested-name-specifier_opt template-name +simple-type-specifier := SHORT +simple-type-specifier := LONG +simple-type-specifier := SIGNED +simple-type-specifier := UNSIGNED simple-type-specifier := builtin-type +#! builtin-type added to aid in classifying which specifiers may combined. builtin-type := CHAR builtin-type := CHAR8_T builtin-type := CHAR16_T builtin-type := CHAR32_T builtin-type := WCHAR_T builtin-type := BOOL -simple-type-specifier := SHORT builtin-type := INT -simple-type-specifier := LONG -simple-type-specifier := SIGNED -simple-type-specifier := UNSIGNED builtin-type := FLOAT builtin-type := DOUBLE builtin-type := VOID diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -71,6 +71,10 @@ - Fix `#57008 `_ - Builtin C++ language extension type traits instantiated by a template with unexpected number of arguments cause an assertion fault. +- Fix multi-level pack expansion of undeclared function parameters. + This fixes `Issue 56094 `_. +- Fix `#57151 `_. + ``-Wcomma`` is emitted for void returning functions. Improvements to Clang's diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -3061,10 +3061,6 @@ PREARGS_START + getNumPreArgs() + getNumArgs()); } - /// getNumCommas - Return the number of commas that must have been present in - /// this function call. - unsigned getNumCommas() const { return getNumArgs() ? getNumArgs() - 1 : 0; } - /// Get FPOptionsOverride from trailing storage. FPOptionsOverride getStoredFPFeatures() const { assert(hasStoredFPFeatures()); diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -348,10 +348,12 @@ /// Returns the `DeclContext` of the block being analysed, if any. Otherwise, /// returns null. - const DeclContext *getDeclCtx() { return DeclCtx; } + const DeclContext *getDeclCtx() { return CallStack.back(); } - /// Sets the `DeclContext` of the block being analysed. - void setDeclCtx(const DeclContext *Ctx) { DeclCtx = Ctx; } + /// Returns whether this `Environment` can be extended to analyze the given + /// `Callee` (i.e. if `pushCall` can be used), with recursion disallowed and a + /// given `MaxDepth`. + bool canDescend(unsigned MaxDepth, const DeclContext *Callee) const; /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise, /// returns null. @@ -390,7 +392,7 @@ DataflowAnalysisContext *DACtx; // `DeclContext` of the block being analysed if provided. - const DeclContext *DeclCtx = nullptr; + std::vector CallStack; // In a properly initialized `Environment`, `ReturnLoc` should only be null if // its `DeclContext` could not be cast to a `FunctionDecl`. diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h --- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h +++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h @@ -21,7 +21,11 @@ namespace clang { namespace dataflow { -struct ContextSensitiveOptions {}; +struct ContextSensitiveOptions { + /// The maximum depth to analyze. A value of zero is equivalent to disabling + /// context-sensitive analysis entirely. + unsigned Depth = 2; +}; struct TransferOptions { /// Options for analyzing function bodies when present in the translation diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -127,6 +127,10 @@ ``Obj2`` will be initialized *before* ``Obj1`` despite the usual order of initialization being the opposite. +On Windows, ``init_seg(compiler)`` is represented with a priority of 200 and +``init_seg(library)`` is represented with a priority of 400. ``init_seg(user)`` +uses the default 65535 priority. + This attribute is only supported for C++ and Objective-C++ and is ignored in other language modes. Currently, this attribute is not implemented on z/OS. }]; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -23,6 +23,10 @@ namespace tooling { namespace dependencies { +/// A callback to lookup module outputs for "-fmodule-file=", "-o" etc. +using LookupModuleOutputCallback = + llvm::function_ref; + /// The full dependencies and module graph for a specific input. struct FullDependencies { /// The identifier of the C++20 module this translation unit exports. @@ -45,17 +49,8 @@ /// determined that the differences are benign for this compilation. std::vector ClangModuleDeps; - /// The original command line of the TU (excluding the compiler executable). - std::vector OriginalCommandLine; - - /// Get the full command line. - /// - /// \param LookupModuleOutput This function is called to fill in - /// "-fmodule-file=", "-o" and other output - /// arguments for dependencies. - std::vector getCommandLine( - llvm::function_ref - LookupModuleOutput) const; + /// The command line of the TU (excluding the compiler executable). + std::vector CommandLine; }; struct FullDependenciesResult { @@ -92,12 +87,16 @@ /// function for a single \c DependencyScanningTool in a /// single build. Use a different one for different tools, /// and clear it between builds. + /// \param LookupModuleOutput This function is called to fill in + /// "-fmodule-file=", "-o" and other output + /// arguments for dependencies. /// /// \returns a \c StringError with the diagnostic output if clang errors /// occurred, \c FullDependencies otherwise. llvm::Expected getFullDependencies(const std::vector &CommandLine, StringRef CWD, const llvm::StringSet<> &AlreadySeen, + LookupModuleOutputCallback LookupModuleOutput, llvm::Optional ModuleName = None); private: @@ -106,8 +105,9 @@ class FullDependencyConsumer : public DependencyConsumer { public: - FullDependencyConsumer(const llvm::StringSet<> &AlreadySeen) - : AlreadySeen(AlreadySeen) {} + FullDependencyConsumer(const llvm::StringSet<> &AlreadySeen, + LookupModuleOutputCallback LookupModuleOutput) + : AlreadySeen(AlreadySeen), LookupModuleOutput(LookupModuleOutput) {} void handleDependencyOutputOpts(const DependencyOutputOptions &) override {} @@ -127,6 +127,11 @@ ContextHash = std::move(Hash); } + std::string lookupModuleOutput(const ModuleID &ID, + ModuleOutputKind Kind) override { + return LookupModuleOutput(ID, Kind); + } + FullDependenciesResult getFullDependencies( const std::vector &OriginalCommandLine) const; @@ -138,6 +143,7 @@ std::string ContextHash; std::vector OutputPaths; const llvm::StringSet<> &AlreadySeen; + LookupModuleOutputCallback LookupModuleOutput; }; } // end namespace dependencies diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -42,6 +42,9 @@ virtual void handleModuleDependency(ModuleDeps MD) = 0; virtual void handleContextHash(std::string Hash) = 0; + + virtual std::string lookupModuleOutput(const ModuleID &ID, + ModuleOutputKind Kind) = 0; }; /// An individual dependency scanning worker that is able to run on its own diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h --- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h @@ -119,25 +119,11 @@ // the primary TU. bool ImportedByMainFile = false; - /// Whether the TU had a dependency file. The path in \c BuildInvocation is - /// cleared to avoid leaking the specific path from the TU into the module. - bool HadDependencyFile = false; - - /// Whether the TU had serialized diagnostics. The path in \c BuildInvocation - /// is cleared to avoid leaking the specific path from the TU into the module. - bool HadSerializedDiagnostics = false; - /// Compiler invocation that can be used to build this module (without paths). CompilerInvocation BuildInvocation; /// Gets the canonical command line suitable for passing to clang. - /// - /// \param LookupModuleOutput This function is called to fill in - /// "-fmodule-file=", "-o" and other output - /// arguments. - std::vector getCanonicalCommandLine( - llvm::function_ref - LookupModuleOutput) const; + std::vector getCanonicalCommandLine() const; }; class ModuleDepCollector; @@ -237,9 +223,12 @@ /// Constructs a CompilerInvocation that can be used to build the given /// module, excluding paths to discovered modular dependencies that are yet to /// be built. - CompilerInvocation makeInvocationForModuleBuildWithoutPaths( + CompilerInvocation makeInvocationForModuleBuildWithoutOutputs( const ModuleDeps &Deps, llvm::function_ref Optimize) const; + + /// Add paths that require looking up outputs to the given dependencies. + void addOutputPaths(ModuleDeps &Deps); }; } // end namespace dependencies diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3486,8 +3486,8 @@ bool FunctionDecl::hasOneParamOrDefaultArgs() const { return getNumParams() == 1 || (getNumParams() > 1 && - std::all_of(param_begin() + 1, param_end(), - [](ParmVarDecl *P) { return P->hasDefaultArg(); })); + llvm::all_of(llvm::drop_begin(parameters()), + [](ParmVarDecl *P) { return P->hasDefaultArg(); })); } /// The combination of the extern and inline keywords under MSVC forces diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1891,7 +1891,7 @@ // (which is different from the current class) is responsible for // destroying them. const CXXRecordDecl *CD = VI.getType()->getAsCXXRecordDecl(); - if (!CD->hasTrivialDestructor()) { + if (CD && !CD->hasTrivialDestructor()) { autoCreateBlock(); appendBaseDtor(Block, &VI); } @@ -1901,7 +1901,7 @@ for (const auto &BI : RD->bases()) { if (!BI.isVirtual()) { const CXXRecordDecl *CD = BI.getType()->getAsCXXRecordDecl(); - if (!CD->hasTrivialDestructor()) { + if (CD && !CD->hasTrivialDestructor()) { autoCreateBlock(); appendBaseDtor(Block, &BI); } diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -20,6 +20,7 @@ #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include @@ -154,10 +155,10 @@ : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {} Environment::Environment(const Environment &Other) - : DACtx(Other.DACtx), DeclCtx(Other.DeclCtx), ReturnLoc(Other.ReturnLoc), - ThisPointeeLoc(Other.ThisPointeeLoc), DeclToLoc(Other.DeclToLoc), - ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal), - MemberLocToStruct(Other.MemberLocToStruct), + : DACtx(Other.DACtx), CallStack(Other.CallStack), + ReturnLoc(Other.ReturnLoc), ThisPointeeLoc(Other.ThisPointeeLoc), + DeclToLoc(Other.DeclToLoc), ExprToLoc(Other.ExprToLoc), + LocToVal(Other.LocToVal), MemberLocToStruct(Other.MemberLocToStruct), FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) { } @@ -168,11 +169,11 @@ } Environment::Environment(DataflowAnalysisContext &DACtx, - const DeclContext &DeclCtxArg) + const DeclContext &DeclCtx) : Environment(DACtx) { - setDeclCtx(&DeclCtxArg); + CallStack.push_back(&DeclCtx); - if (const auto *FuncDecl = dyn_cast(DeclCtx)) { + if (const auto *FuncDecl = dyn_cast(&DeclCtx)) { assert(FuncDecl->getBody() != nullptr); initGlobalVars(*FuncDecl->getBody(), *this); for (const auto *ParamDecl : FuncDecl->parameters()) { @@ -187,7 +188,7 @@ ReturnLoc = &createStorageLocation(ReturnType); } - if (const auto *MethodDecl = dyn_cast(DeclCtx)) { + if (const auto *MethodDecl = dyn_cast(&DeclCtx)) { auto *Parent = MethodDecl->getParent(); assert(Parent != nullptr); if (Parent->isLambda()) @@ -205,6 +206,11 @@ } } +bool Environment::canDescend(unsigned MaxDepth, + const DeclContext *Callee) const { + return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); +} + Environment Environment::pushCall(const CallExpr *Call) const { Environment Env(*this); @@ -239,7 +245,7 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl, ArrayRef Args) { - setDeclCtx(FuncDecl); + CallStack.push_back(FuncDecl); // FIXME: In order to allow the callee to reference globals, we probably need // to call `initGlobalVars` here in some way. @@ -326,13 +332,13 @@ assert(DACtx == Other.DACtx); assert(ReturnLoc == Other.ReturnLoc); assert(ThisPointeeLoc == Other.ThisPointeeLoc); - assert(DeclCtx == Other.DeclCtx); + assert(CallStack == Other.CallStack); auto Effect = LatticeJoinEffect::Unchanged; Environment JoinedEnv(*DACtx); - JoinedEnv.setDeclCtx(DeclCtx); + JoinedEnv.CallStack = CallStack; JoinedEnv.ReturnLoc = ReturnLoc; JoinedEnv.ThisPointeeLoc = ThisPointeeLoc; diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -661,7 +661,8 @@ // `F` of `S`. The type `E` must be either `CallExpr` or `CXXConstructExpr`. template void transferInlineCall(const E *S, const FunctionDecl *F) { - if (!Options.ContextSensitiveOpts) + if (!(Options.ContextSensitiveOpts && + Env.canDescend(Options.ContextSensitiveOpts->Depth, F))) return; const ControlFlowContext *CFCtx = Env.getControlFlowContext(F); @@ -689,7 +690,7 @@ assert(CFCtx->getDecl() != nullptr && "ControlFlowContexts in the environment should always carry a decl"); auto Analysis = NoopAnalysis(CFCtx->getDecl()->getASTContext(), - DataflowAnalysisOptions()); + DataflowAnalysisOptions{Options}); auto BlockToOutputState = dataflow::runDataflowAnalysis(*CFCtx, Analysis, CalleeEnv); diff --git a/clang/lib/Analysis/ReachableCode.cpp b/clang/lib/Analysis/ReachableCode.cpp --- a/clang/lib/Analysis/ReachableCode.cpp +++ b/clang/lib/Analysis/ReachableCode.cpp @@ -299,6 +299,12 @@ if (isa(Term)) { return isConfigurationValue(Term, PP); } + // Do not treat constexpr if statement successors as unreachable in warnings + // since the point of these statements is to determine branches at compile + // time. + if (const auto *IS = dyn_cast(Term); + IS != nullptr && IS->isConstexpr()) + return true; } const Stmt *Cond = B->getTerminatorCondition(/* stripParens */ false); diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -177,7 +177,6 @@ /// ivars and property accessors. llvm::DIType *CreateType(const BuiltinType *Ty); llvm::DIType *CreateType(const ComplexType *Ty); - llvm::DIType *CreateType(const AutoType *Ty); llvm::DIType *CreateType(const BitIntType *Ty); llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg); llvm::DIType *CreateQualifiedType(const FunctionProtoType *Ty, @@ -231,10 +230,10 @@ /// not updated to include implicit \c this pointer. Use this routine /// to get a method type which includes \c this pointer. llvm::DISubroutineType *getOrCreateMethodType(const CXXMethodDecl *Method, - llvm::DIFile *F, bool decl); + llvm::DIFile *F); llvm::DISubroutineType * getOrCreateInstanceMethodType(QualType ThisPtr, const FunctionProtoType *Func, - llvm::DIFile *Unit, bool decl); + llvm::DIFile *Unit); llvm::DISubroutineType * getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F); /// \return debug info descriptor for vtable. diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -883,10 +883,6 @@ return DBuilder.createBasicType(BTName, Size, Encoding); } -llvm::DIType *CGDebugInfo::CreateType(const AutoType *Ty) { - return DBuilder.createUnspecifiedType("auto"); -} - llvm::DIType *CGDebugInfo::CreateType(const BitIntType *Ty) { StringRef Name = Ty->isUnsigned() ? "unsigned _BitInt" : "_BitInt"; @@ -1647,18 +1643,16 @@ llvm::DISubroutineType * CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method, - llvm::DIFile *Unit, bool decl) { - const auto *Func = Method->getType()->castAs(); + llvm::DIFile *Unit) { + const FunctionProtoType *Func = Method->getType()->getAs(); if (Method->isStatic()) return cast_or_null( getOrCreateType(QualType(Func, 0), Unit)); - return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit, decl); + return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit); } -llvm::DISubroutineType * -CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr, - const FunctionProtoType *Func, - llvm::DIFile *Unit, bool decl) { +llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( + QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile *Unit) { FunctionProtoType::ExtProtoInfo EPI = Func->getExtProtoInfo(); Qualifiers &Qc = EPI.TypeQuals; Qc.removeConst(); @@ -1681,20 +1675,9 @@ assert(Args.size() && "Invalid number of arguments!"); SmallVector Elts; + // First element is always return type. For 'void' functions it is NULL. - QualType temp = Func->getReturnType(); - if (temp->getTypeClass() == Type::Auto && decl) { - const AutoType *AT = cast(temp); - - // It may be tricky in some cases to link the specification back the lambda - // call operator and so we skip emitting "auto" for lambdas. This is - // consistent with gcc as well. - if (AT->isDeduced() && ThisPtr->getPointeeCXXRecordDecl()->isLambda()) - Elts.push_back(getOrCreateType(AT->getDeducedType(), Unit)); - else - Elts.push_back(CreateType(AT)); - } else - Elts.push_back(Args[0]); + Elts.push_back(Args[0]); // "this" pointer is always first argument. const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl(); @@ -1747,7 +1730,7 @@ isa(Method) || isa(Method); StringRef MethodName = getFunctionName(Method); - llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit, true); + llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit); // Since a single ctor/dtor corresponds to multiple functions, it doesn't // make sense to give a single ctor/dtor a linkage name. @@ -3160,7 +3143,7 @@ return DBuilder.createMemberPointerType( getOrCreateInstanceMethodType( CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()), - FPT, U, false), + FPT, U), ClassType, Size, /*Align=*/0, Flags); } @@ -3971,7 +3954,7 @@ return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None)); if (const auto *Method = dyn_cast(D)) - return getOrCreateMethodType(Method, F, false); + return getOrCreateMethodType(Method, F); const auto *FTy = FnType->getAs(); CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C; diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -553,7 +553,18 @@ CXXThreadLocalInits.push_back(Fn); CXXThreadLocalInitVars.push_back(D); } else if (PerformInit && ISA) { - EmitPointerToInitFunc(D, Addr, Fn, ISA); + // Contract with backend that "init_seg(compiler)" corresponds to priority + // 200 and "init_seg(lib)" corresponds to priority 400. + int Priority = -1; + if (ISA->getSection() == ".CRT$XCC") + Priority = 200; + else if (ISA->getSection() == ".CRT$XCL") + Priority = 400; + + if (Priority != -1) + AddGlobalCtor(Fn, Priority, COMDATKey); + else + EmitPointerToInitFunc(D, Addr, Fn, ISA); } else if (auto *IPA = D->getAttr()) { OrderGlobalInitsOrStermFinalizers Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size()); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5338,7 +5338,7 @@ // Emit global alias debug information. if (isa(D)) if (CGDebugInfo *DI = getModuleDebugInfo()) - DI->EmitGlobalAlias(cast(GA->getAliasee()), GD); + DI->EmitGlobalAlias(cast(GA->getAliasee()->stripPointerCasts()), GD); } void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -11499,6 +11499,56 @@ }; } // end anonymous namespace +//===----------------------------------------------------------------------===// +// BPF ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class BPFABIInfo : public DefaultABIInfo { +public: + BPFABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (isAggregateTypeForABI(RetTy)) + return getNaturalAlignIndirect(RetTy); + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + ASTContext &Context = getContext(); + if (const auto *EIT = RetTy->getAs()) + if (EIT->getNumBits() > Context.getTypeSize(Context.Int128Ty)) + return getNaturalAlignIndirect(RetTy); + + // Caller will do necessary sign/zero extension. + return ABIArgInfo::getDirect(); + } + + void computeInfo(CGFunctionInfo &FI) const override { + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } + +}; + +class BPFTargetCodeGenInfo : public TargetCodeGenInfo { +public: + BPFTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique(CGT)) {} + + const BPFABIInfo &getABIInfo() const { + return static_cast(TargetCodeGenInfo::getABIInfo()); + } +}; + +} + //===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// @@ -11727,6 +11777,9 @@ : hasFP64 ? 64 : 32)); } + case llvm::Triple::bpfeb: + case llvm::Triple::bpfel: + return SetCGInfo(new BPFTargetCodeGenInfo(Types)); } } diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -896,12 +896,7 @@ bool MachO::HasNativeLLVMSupport() const { return true; } ToolChain::CXXStdlibType Darwin::GetDefaultCXXStdlibType() const { - // Use libstdc++ on old targets (OSX < 10.9 and iOS < 7) - if ((isTargetMacOSBased() && isMacosxVersionLT(10, 9)) || - (isTargetIOSBased() && isIPhoneOSVersionLT(7, 0))) - return ToolChain::CST_Libstdcxx; - - // On all other targets, use libc++ + // Always use libc++ by default return ToolChain::CST_Libcxx; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2065,6 +2065,11 @@ if (PreviousNotConst->isSimpleTypeSpecifier()) return true; + // type[] a in Java + if (Style.Language == FormatStyle::LK_Java && + PreviousNotConst->is(tok::r_square)) + return true; + // const a = in JavaScript. return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const); } diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -332,8 +332,7 @@ return; // No special characters are allowed in CaretLine. - assert(CaretLine.end() == - llvm::find_if(CaretLine, [](char c) { return c < ' ' || '~' < c; })); + assert(llvm::none_of(CaretLine, [](char c) { return c < ' ' || '~' < c; })); // Find the slice that we need to display the full caret line // correctly. diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11514,16 +11514,25 @@ if (CXXConstructorDecl *Constructor = dyn_cast(NewFD)) { CheckConstructor(Constructor); } else if (CXXDestructorDecl *Destructor = - dyn_cast(NewFD)) { - CXXRecordDecl *Record = Destructor->getParent(); - QualType ClassType = Context.getTypeDeclType(Record); - - DeclarationName Name = Context.DeclarationNames.getCXXDestructorName( - Context.getCanonicalType(ClassType)); - if (NewFD->getDeclName() != Name) { - Diag(NewFD->getLocation(), diag::err_destructor_name); - NewFD->setInvalidDecl(); - return Redeclaration; + dyn_cast(NewFD)) { + // We check here for invalid destructor names. + // If we have a friend destructor declaration that is dependent, we can't + // diagnose right away because cases like this are still valid: + // template struct A { friend T::X::~Y(); }; + // struct B { struct Y { ~Y(); }; using X = Y; }; + // template struct A; + if (NewFD->getFriendObjectKind() == Decl::FriendObjectKind::FOK_None || + !Destructor->getThisType()->isDependentType()) { + CXXRecordDecl *Record = Destructor->getParent(); + QualType ClassType = Context.getTypeDeclType(Record); + + DeclarationName Name = Context.DeclarationNames.getCXXDestructorName( + Context.getCanonicalType(ClassType)); + if (NewFD->getDeclName() != Name) { + Diag(NewFD->getLocation(), diag::err_destructor_name); + NewFD->setInvalidDecl(); + return Redeclaration; + } } } else if (auto *Guide = dyn_cast(NewFD)) { if (auto *TD = Guide->getDescribedFunctionTemplate()) diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -4309,11 +4309,21 @@ } if (getLangOpts().MSVCCompat && !getLangOpts().CPlusPlus20) { - auto UnqualifiedBase = R.getAsSingle(); - if (UnqualifiedBase) { - Diag(IdLoc, diag::ext_unqualified_base_class) - << SourceRange(IdLoc, Init->getSourceRange().getEnd()); - BaseType = UnqualifiedBase->getInjectedClassNameSpecialization(); + if (auto UnqualifiedBase = R.getAsSingle()) { + auto *TempSpec = cast( + UnqualifiedBase->getInjectedClassNameSpecialization()); + TemplateName TN = TempSpec->getTemplateName(); + for (auto const &Base : ClassDecl->bases()) { + auto BaseTemplate = + Base.getType()->getAs(); + if (BaseTemplate && Context.hasSameTemplateName( + BaseTemplate->getTemplateName(), TN)) { + Diag(IdLoc, diag::ext_unqualified_base_class) + << SourceRange(IdLoc, Init->getSourceRange().getEnd()); + BaseType = Base.getType(); + break; + } + } } } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -13957,8 +13957,10 @@ return getLangOpts().CPlusPlus ? LHSType : LHSType.getAtomicUnqualifiedType(); } -// Only ignore explicit casts to void. -static bool IgnoreCommaOperand(const Expr *E) { +// Scenarios to ignore if expression E is: +// 1. an explicit cast expression into void +// 2. a function call expression that returns void +static bool IgnoreCommaOperand(const Expr *E, const ASTContext &Context) { E = E->IgnoreParens(); if (const CastExpr *CE = dyn_cast(E)) { @@ -13973,6 +13975,8 @@ } } + if (const auto *CE = dyn_cast(E)) + return CE->getCallReturnType(Context)->isVoidType(); return false; } @@ -14014,7 +14018,7 @@ } // Only allow some expressions on LHS to not warn. - if (IgnoreCommaOperand(LHS)) + if (IgnoreCommaOperand(LHS, Context)) return; Diag(Loc, diag::warn_comma_operator); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -3788,9 +3788,8 @@ // Variable is used if it has been marked as an array, array // section, array shaping or the variable iself. return StackComponents.size() == 1 || - std::all_of( - std::next(StackComponents.rbegin()), - StackComponents.rend(), + llvm::all_of( + llvm::drop_begin(llvm::reverse(StackComponents)), [](const OMPClauseMappableExprCommon:: MappableComponent &MC) { return MC.getAssociatedDeclaration() == diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -2445,6 +2445,9 @@ if (X.getKind() != Y.getKind()) return false; + bool ClangABICompat14 = + Context.getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver14; + switch (X.getKind()) { case TemplateArgument::Null: llvm_unreachable("Comparing NULL template argument"); @@ -2477,30 +2480,42 @@ } case TemplateArgument::Pack: - unsigned PackIterationSize = X.pack_size(); - if (X.pack_size() != Y.pack_size()) { - if (!PartialOrdering) + if (ClangABICompat14) { + if (X.pack_size() != Y.pack_size()) return false; - // C++0x [temp.deduct.type]p9: - // During partial ordering, if Ai was originally a pack expansion: - // - if P does not contain a template argument corresponding to Ai then - // Ai is ignored; - bool XHasMoreArg = X.pack_size() > Y.pack_size(); - if (!(XHasMoreArg && X.pack_elements().back().isPackExpansion()) && - !(!XHasMoreArg && Y.pack_elements().back().isPackExpansion())) - return false; + for (TemplateArgument::pack_iterator XP = X.pack_begin(), + XPEnd = X.pack_end(), + YP = Y.pack_begin(); + XP != XPEnd; ++XP, ++YP) + if (!isSameTemplateArg(Context, *XP, *YP, PackExpansionMatchesPack)) + return false; + } else { + unsigned PackIterationSize = X.pack_size(); + if (X.pack_size() != Y.pack_size()) { + if (!PartialOrdering) + return false; + + // C++0x [temp.deduct.type]p9: + // During partial ordering, if Ai was originally a pack expansion: + // - if P does not contain a template argument corresponding to Ai + // then Ai is ignored; + bool XHasMoreArg = X.pack_size() > Y.pack_size(); + if (!(XHasMoreArg && X.pack_elements().back().isPackExpansion()) && + !(!XHasMoreArg && Y.pack_elements().back().isPackExpansion())) + return false; + + if (XHasMoreArg) + PackIterationSize = Y.pack_size(); + } - if (XHasMoreArg) - PackIterationSize = Y.pack_size(); + ArrayRef XP = X.pack_elements(); + ArrayRef YP = Y.pack_elements(); + for (unsigned i = 0; i < PackIterationSize; ++i) + if (!isSameTemplateArg(Context, XP[i], YP[i], PartialOrdering, + PackExpansionMatchesPack)) + return false; } - - ArrayRef XP = X.pack_elements(); - ArrayRef YP = Y.pack_elements(); - for (unsigned i = 0; i < PackIterationSize; ++i) - if (!isSameTemplateArg(Context, XP[i], YP[i], PartialOrdering, - PackExpansionMatchesPack)) - return false; return true; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -5792,6 +5792,7 @@ = dyn_cast(OldType)) { // We have a function parameter pack that may need to be expanded. QualType Pattern = Expansion->getPattern(); + NumExpansions = Expansion->getNumExpansions(); SmallVector Unexpanded; getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded); diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp @@ -13,18 +13,12 @@ using namespace tooling; using namespace dependencies; -std::vector FullDependencies::getCommandLine( - llvm::function_ref - LookupModuleOutput) const { +static std::vector +makeTUCommandLineWithoutPaths(ArrayRef OriginalCommandLine) { std::vector Args = OriginalCommandLine; Args.push_back("-fno-implicit-modules"); Args.push_back("-fno-implicit-module-maps"); - for (const PrebuiltModuleDep &PMD : PrebuiltModuleDeps) - Args.push_back("-fmodule-file=" + PMD.PCMFile); - for (ModuleID MID : ClangModuleDeps) - Args.push_back("-fmodule-file=" + - LookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); // These arguments are unused in explicit compiles. llvm::erase_if(Args, [](StringRef Arg) { @@ -72,6 +66,11 @@ void handleContextHash(std::string Hash) override {} + std::string lookupModuleOutput(const ModuleID &ID, + ModuleOutputKind Kind) override { + llvm::report_fatal_error("unexpected call to lookupModuleOutput"); + } + void printDependencies(std::string &S) { assert(Opts && "Handled dependency output options."); @@ -113,8 +112,9 @@ DependencyScanningTool::getFullDependencies( const std::vector &CommandLine, StringRef CWD, const llvm::StringSet<> &AlreadySeen, + LookupModuleOutputCallback LookupModuleOutput, llvm::Optional ModuleName) { - FullDependencyConsumer Consumer(AlreadySeen); + FullDependencyConsumer Consumer(AlreadySeen, LookupModuleOutput); llvm::Error Result = Worker.computeDependencies(CWD, CommandLine, Consumer, ModuleName); if (Result) @@ -126,16 +126,24 @@ const std::vector &OriginalCommandLine) const { FullDependencies FD; - FD.OriginalCommandLine = ArrayRef(OriginalCommandLine).slice(1); + FD.CommandLine = makeTUCommandLineWithoutPaths( + ArrayRef(OriginalCommandLine).slice(1)); FD.ID.ContextHash = std::move(ContextHash); FD.FileDeps.assign(Dependencies.begin(), Dependencies.end()); + for (const PrebuiltModuleDep &PMD : PrebuiltModuleDeps) + FD.CommandLine.push_back("-fmodule-file=" + PMD.PCMFile); + for (auto &&M : ClangModuleDeps) { auto &MD = M.second; - if (MD.ImportedByMainFile) + if (MD.ImportedByMainFile) { FD.ClangModuleDeps.push_back(MD.ID); + FD.CommandLine.push_back( + "-fmodule-file=" + + LookupModuleOutput(MD.ID, ModuleOutputKind::ModuleFile)); + } } FD.PrebuiltModuleDeps = std::move(PrebuiltModuleDeps); diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -42,7 +42,47 @@ Opts.UserEntries.push_back(Entries[Idx]); } -CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths( +static std::vector splitString(std::string S, char Separator) { + SmallVector Segments; + StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false); + std::vector Result; + Result.reserve(Segments.size()); + for (StringRef Segment : Segments) + Result.push_back(Segment.str()); + return Result; +} + +void ModuleDepCollector::addOutputPaths(ModuleDeps &Deps) { + CompilerInvocation &CI = Deps.BuildInvocation; + for (ModuleID MID : Deps.ClangModuleDeps) + CI.getFrontendOpts().ModuleFiles.push_back( + Consumer.lookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); + + CI.getFrontendOpts().OutputFile = + Consumer.lookupModuleOutput(Deps.ID, ModuleOutputKind::ModuleFile); + if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty()) + CI.getDiagnosticOpts().DiagnosticSerializationFile = + Consumer.lookupModuleOutput( + Deps.ID, ModuleOutputKind::DiagnosticSerializationFile); + if (!CI.getDependencyOutputOpts().OutputFile.empty()) { + CI.getDependencyOutputOpts().OutputFile = + Consumer.lookupModuleOutput(Deps.ID, ModuleOutputKind::DependencyFile); + CI.getDependencyOutputOpts().Targets = + splitString(Consumer.lookupModuleOutput( + Deps.ID, ModuleOutputKind::DependencyTargets), + '\0'); + if (!CI.getDependencyOutputOpts().OutputFile.empty() && + CI.getDependencyOutputOpts().Targets.empty()) { + // Fallback to -o as dependency target, as in the driver. + SmallString<128> Target; + quoteMakeTarget(CI.getFrontendOpts().OutputFile, Target); + CI.getDependencyOutputOpts().Targets.push_back(std::string(Target)); + } + } +} + +CompilerInvocation +ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs( const ModuleDeps &Deps, llvm::function_ref Optimize) const { // Make a deep copy of the original Clang invocation. @@ -58,8 +98,12 @@ CI.getFrontendOpts().OutputFile.clear(); CI.getCodeGenOpts().MainFileName.clear(); CI.getCodeGenOpts().DwarfDebugFlags.clear(); - CI.getDiagnosticOpts().DiagnosticSerializationFile.clear(); - CI.getDependencyOutputOpts().OutputFile.clear(); + // Map output paths that affect behaviour to "-" so their existence is in the + // context hash. The final path will be computed in addOutputPaths. + if (!CI.getDiagnosticOpts().DiagnosticSerializationFile.empty()) + CI.getDiagnosticOpts().DiagnosticSerializationFile = "-"; + if (!CI.getDependencyOutputOpts().OutputFile.empty()) + CI.getDependencyOutputOpts().OutputFile = "-"; CI.getDependencyOutputOpts().Targets.clear(); CI.getFrontendOpts().ProgramAction = frontend::GenerateModule; @@ -78,6 +122,17 @@ CI.getHeaderSearchOpts().ModuleCachePruneInterval = 7 * 24 * 60 * 60; CI.getHeaderSearchOpts().ModuleCachePruneAfter = 31 * 24 * 60 * 60; + // Inputs + InputKind ModuleMapInputKind(CI.getFrontendOpts().DashX.getLanguage(), + InputKind::Format::ModuleMap); + CI.getFrontendOpts().Inputs.emplace_back(Deps.ClangModuleMapFile, + ModuleMapInputKind); + CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; + + // Report the prebuilt modules this module uses. + for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) + CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); + // Remove any macro definitions that are explicitly ignored. if (!CI.getHeaderSearchOpts().ModulesIgnoreMacros.empty()) { llvm::erase_if( @@ -91,12 +146,6 @@ CI.getHeaderSearchOpts().ModulesIgnoreMacros.clear(); } - // Report the prebuilt modules this module uses. - for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) - CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile); - - CI.getFrontendOpts().ModuleMapFiles = Deps.ModuleMapFileDeps; - Optimize(CI); // The original invocation probably didn't have strict context hash enabled. @@ -125,49 +174,8 @@ return std::vector{Args.begin(), Args.end()}; } -static std::vector splitString(std::string S, char Separator) { - SmallVector Segments; - StringRef(S).split(Segments, Separator, /*MaxSplit=*/-1, /*KeepEmpty=*/false); - std::vector Result; - Result.reserve(Segments.size()); - for (StringRef Segment : Segments) - Result.push_back(Segment.str()); - return Result; -} - -std::vector ModuleDeps::getCanonicalCommandLine( - llvm::function_ref - LookupModuleOutput) const { - CompilerInvocation CI(BuildInvocation); - FrontendOptions &FrontendOpts = CI.getFrontendOpts(); - - InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(), - InputKind::Format::ModuleMap); - FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind); - FrontendOpts.OutputFile = - LookupModuleOutput(ID, ModuleOutputKind::ModuleFile); - if (HadSerializedDiagnostics) - CI.getDiagnosticOpts().DiagnosticSerializationFile = - LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile); - if (HadDependencyFile) { - DependencyOutputOptions &DepOpts = CI.getDependencyOutputOpts(); - DepOpts.OutputFile = - LookupModuleOutput(ID, ModuleOutputKind::DependencyFile); - DepOpts.Targets = splitString( - LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0'); - if (!DepOpts.OutputFile.empty() && DepOpts.Targets.empty()) { - // Fallback to -o as dependency target, as in the driver. - SmallString<128> Target; - quoteMakeTarget(FrontendOpts.OutputFile, Target); - DepOpts.Targets.push_back(std::string(Target)); - } - } - - for (ModuleID MID : ClangModuleDeps) - FrontendOpts.ModuleFiles.push_back( - LookupModuleOutput(MID, ModuleOutputKind::ModuleFile)); - - return serializeCompilerInvocation(CI); +std::vector ModuleDeps::getCanonicalCommandLine() const { + return serializeCompilerInvocation(BuildInvocation); } static std::string getModuleContextHash(const ModuleDeps &MD) { @@ -190,23 +198,16 @@ return ""; }); - // Hash the input file paths and module dependencies. These paths may differ - // even if the invocation is identical if they depend on the contents of the - // files in the TU -- for example, case-insensitive paths to modulemap files. - // Usually such a case would indicate a missed optimization to canonicalize, - // but it may be difficult to canonicalize all cases when there is a VFS. - HashBuilder.add(MD.ClangModuleMapFile); - for (const auto &Dep : MD.PrebuiltModuleDeps) - HashBuilder.add(Dep.PCMFile); + // Hash the module dependencies. These paths may differ even if the invocation + // is identical if they depend on the contents of the files in the TU -- for + // example, case-insensitive paths to modulemap files. Usually such a case + // would indicate a missed optimization to canonicalize, but it may be + // difficult to canonicalize all cases when there is a VFS. for (const auto &ID : MD.ClangModuleDeps) { HashBuilder.add(ID.ModuleName); HashBuilder.add(ID.ContextHash); } - // Hash options that affect which callbacks are made for outputs. - HashBuilder.add(MD.HadDependencyFile); - HashBuilder.add(MD.HadSerializedDiagnostics); - llvm::BLAKE3Result<16> Hash = HashBuilder.final(); std::array Words; static_assert(sizeof(Hash) == sizeof(Words), "Hash must match Words"); @@ -387,22 +388,20 @@ llvm::DenseSet SeenModules; addAllSubmodulePrebuiltDeps(M, MD, SeenModules); - MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths( + MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutOutputs( MD, [&](CompilerInvocation &BuildInvocation) { if (MDC.OptimizeArgs) optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(), *MDC.ScanInstance.getASTReader(), *MF); }); - MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts() - .DiagnosticSerializationFile.empty(); - MD.HadDependencyFile = - !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty(); llvm::DenseSet AddedModules; addAllSubmoduleDeps(M, MD, AddedModules); - // Do this last since it requires the dependencies. + // Compute the context hash from the inputs. Requires dependencies. MD.ID.ContextHash = getModuleContextHash(MD); + // Finish the compiler invocation. Requires dependencies and the context hash. + MDC.addOutputPaths(MD); return MD.ID; } diff --git a/clang/test/CXX/temp/temp.decls/temp.variadic/p5.cpp b/clang/test/CXX/temp/temp.decls/temp.variadic/p5.cpp --- a/clang/test/CXX/temp/temp.decls/temp.variadic/p5.cpp +++ b/clang/test/CXX/temp/temp.decls/temp.variadic/p5.cpp @@ -469,3 +469,25 @@ bar(b); } } + +namespace pr56094 { +template struct D { + template using B = int(int (*...p)(T, U)); + // expected-error@-1 {{pack expansion contains parameter pack 'U' that has a different length (1 vs. 2) from outer parameter packs}} + template D(B *); + // expected-note@-1 {{in instantiation of template type alias 'B' requested here}} +}; +using t1 = D::B; +// expected-note@-1 {{in instantiation of template class 'pr56094::D' requested here}} + +template struct F {}; +template struct G {}; +template struct E { + template using B = G...>; + // expected-error@-1 {{pack expansion contains parameter pack 'U' that has a different length (1 vs. 2) from outer parameter packs}} + template E(B *); + // expected-note@-1 {{in instantiation of template type alias 'B' requested here}} +}; +using t2 = E::B; +// expected-note@-1 {{in instantiation of template class 'pr56094::E' requested here}} +} // namespace pr56094 diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c @@ -21,29 +21,38 @@ void test() { // CHECK-LABEL: @test( // CHECK-NEXT: entry: -// CHECK-LE-LABEL: @test( -// CHECK-LE-NEXT: entry: +// NOCOMPAT-LABEL: @test( +// NOCOMPAT-NEXT: entry: res_vf = vec_ctf(vsll, 4); // CHECK: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* @vsll, align 16 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.ppc.vsx.xvcvsxdsp(<2 x i64> [[TMP0]]) // CHECK-NEXT: fmul <4 x float> [[TMP1]], +// NOCOMPAT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* @vsll, align 16 +// NOCOMPAT-NEXT: [[CONV:%.*]] = sitofp <2 x i64> [[TMP0]] to <2 x double> +// NOCOMPAT-NEXT: fmul <2 x double> [[CONV]], res_vf = vec_ctf(vull, 4); // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* @vull, align 16 // CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.ppc.vsx.xvcvuxdsp(<2 x i64> [[TMP2]]) // CHECK-NEXT: fmul <4 x float> [[TMP3]], +// NOCOMPAT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* @vull, align 16 +// NOCOMPAT-NEXT: [[CONV1:%.*]] = uitofp <2 x i64> [[TMP2]] to <2 x double> +// NOCOMPAT-NEXT: fmul <2 x double> [[CONV1]], res_vsll = vec_cts(vd, 4); // CHECK: [[TMP4:%.*]] = load <2 x double>, <2 x double>* @vd, align 16 // CHECK-NEXT: fmul <2 x double> [[TMP4]], // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcvdpsxws(<2 x double> +// NOCOMPAT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* @vd, align 16 +// NOCOMPAT-NEXT: fmul <2 x double> [[TMP4]], res_vull = vec_ctu(vd, 4); // CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* @vd, align 16 // CHECK-NEXT: fmul <2 x double> [[TMP8]], // CHECK: call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double> -// NONCOMPAT: call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double> +// NOCOMPAT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* @vd, align 16 +// NOCOMPAT-NEXT: fmul <2 x double> [[TMP7]], res_vd = vec_round(vd); // CHECK: call double @llvm.ppc.readflm() diff --git a/clang/test/CodeGen/bpf-abiinfo.c b/clang/test/CodeGen/bpf-abiinfo.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/bpf-abiinfo.c @@ -0,0 +1,24 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 -triple bpf -O2 -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s + +_Bool bar_bool(void); +unsigned char bar_char(void); +short bar_short(void); +int bar_int(void); + +int foo_bool(void) { + if (bar_bool() != 1) return 0; else return 1; +} +// CHECK: %call = call i1 @bar_bool() +int foo_char(void) { + if (bar_char() != 10) return 0; else return 1; +} +// CHECK: %call = call i8 @bar_char() +int foo_short(void) { + if (bar_short() != 10) return 0; else return 1; +} +// CHECK: %call = call i16 @bar_short() +int foo_int(void) { + if (bar_int() != 10) return 0; else return 1; +} +// CHECK: %call = call i32 @bar_int() diff --git a/clang/test/CodeGen/debug-info-alias-pointer.c b/clang/test/CodeGen/debug-info-alias-pointer.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/debug-info-alias-pointer.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-windows-msvc -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s +// REQUIRES: asserts + +struct S { + void *p; +}; + +struct S s[] = { + { .p = (void *)0, }, +}; + +extern struct S t __attribute__((__alias__("s"))); + +// CHECK: !DIImportedEntity(tag: DW_TAG_imported_declaration, name: "t", scope: {{.*}}, entity: {{.*}}, file: {{.*}}, line: 12) diff --git a/clang/test/CodeGen/partial-order-variadic.cpp b/clang/test/CodeGen/partial-order-variadic.cpp --- a/clang/test/CodeGen/partial-order-variadic.cpp +++ b/clang/test/CodeGen/partial-order-variadic.cpp @@ -3,20 +3,28 @@ #if defined(CLANG_ABI_COMPAT) && CLANG_ABI_COMPAT <= 14 +// CHECK-14: %"struct.temp_func_order_example3::S" = type { i8 } + // CHECK-14: define dso_local void @_ZN24temp_func_order_example31hEi(i32 noundef %i) -// CHECK-14-NEXT: entry: -// CHECK-14-NEXT: %i.addr = alloca i32, align 4 -// CHECK-14-NEXT: %r = alloca ptr, align 8 -// CHECK-14-NEXT: store i32 %i, ptr %i.addr, align 4 -// CHECK-14-NEXT: %call = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN24temp_func_order_example31gIiJEEERiPT_DpT0_(ptr noundef %i.addr) -// CHECK-14-NEXT: store ptr %call, ptr %r, align 8 -// CHECK-14-NEXT: ret void +// CHECK-14-NEXT: entry: +// CHECK-14-NEXT: %i.addr = alloca i32, align 4 +// CHECK-14-NEXT: %r = alloca ptr, align 8 +// CHECK-14-NEXT: %a = alloca %"struct.temp_func_order_example3::S", align 1 +// CHECK-14-NEXT: store i32 %i, ptr %i.addr, align 4 +// CHECK-14-NEXT: %call = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN24temp_func_order_example31gIiJEEERiPT_DpT0_(ptr noundef %i.addr) +// CHECK-14-NEXT: store ptr %call, ptr %r, align 8 +// CHECK-14-NEXT: ret void namespace temp_func_order_example3 { template int &g(T *, U...); template void g(T); + + template struct S; + template struct S {}; + void h(int i) { int &r = g(&i); + S a; } } diff --git a/clang/test/CodeGenCXX/debug-info-auto-return.cpp b/clang/test/CodeGenCXX/debug-info-auto-return.cpp --- a/clang/test/CodeGenCXX/debug-info-auto-return.cpp +++ b/clang/test/CodeGenCXX/debug-info-auto-return.cpp @@ -2,17 +2,20 @@ // RUN: %clang_cc1 -dwarf-version=5 -emit-llvm -triple x86_64-linux-gnu %s -o - \ // RUN: -O0 -disable-llvm-passes \ // RUN: -debug-info-kind=standalone \ -// RUN: | FileCheck %s +// RUN: | FileCheck --implicit-check-not="\"auto\"" --implicit-check-not=DISubprogram %s -// CHECK: !DISubprogram(name: "findMax",{{.*}}, type: ![[FUN_TYPE:[0-9]+]],{{.*}} +// CHECK: !DISubprogram(name: "findMax",{{.*}}, type: [[FUN_TYPE:![0-9]+]], {{.*}}spFlags: DISPFlagDefinition, {{.*}} declaration: [[DECL:![0-9]+]] -// CHECK: ![[FUN_TYPE]] = !DISubroutineType(types: ![[TYPE_NODE:[0-9]+]]) -// CHECK-NEXT: ![[TYPE_NODE]] = !{![[DOUBLE_TYPE:[0-9]+]], {{.*}} -// CHECK-NEXT: ![[DOUBLE_TYPE]] = !DIBasicType(name: "double", {{.*}}) +// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "myClass", +// CHECK-SAME: elements: [[MEMBERS:![0-9]+]], + +// CHECK: [[MEMBERS]] = !{} + +// CHECK: [[FUN_TYPE]] = !DISubroutineType(types: [[TYPE_NODE:![0-9]+]]) +// CHECK-NEXT: [[TYPE_NODE]] = !{[[DOUBLE_TYPE:![0-9]+]], +// CHECK-NEXT: [[DOUBLE_TYPE]] = !DIBasicType(name: "double", +// CHECK: [[DECL]] = !DISubprogram(name: "findMax",{{.*}}, type: [[FUN_TYPE]], -// CHECK: !DISubroutineType(types: ![[TYPE_DECL_NODE:[0-9]+]]) -// CHECK-NEXT: ![[TYPE_DECL_NODE]] = !{![[AUTO_TYPE:[0-9]+]], {{.*}} -// CHECK-NEXT: ![[AUTO_TYPE]] = !DIBasicType(tag: DW_TAG_unspecified_type, name: "auto") struct myClass { auto findMax(); }; diff --git a/clang/test/CodeGenCXX/no_auto_return_lambda.cpp b/clang/test/CodeGenCXX/no_auto_return_lambda.cpp deleted file mode 100644 --- a/clang/test/CodeGenCXX/no_auto_return_lambda.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s - -// We emit "auto" for deduced return types for member functions but we should -// not emitting "auto" for deduced return types for lambdas call function which -// will be implmented as operator() in a class type. This test will verify that -// behavior. - -__attribute__((used)) int g() { - auto f = []() { return 10; }; - return f(); -} - -// g() is not a member function so we should not emit "auto" for the deduced -// return type. -// -// CHECK: !DISubprogram(name: "g",{{.*}}, type: ![[FUN_TYPE:[0-9]+]],{{.*}} -// CHECK: ![[FUN_TYPE]] = !DISubroutineType(types: ![[TYPE_NODE:[0-9]+]]) -// CHECK: ![[TYPE_NODE]] = !{![[INT_TYPE:[0-9]+]]} -// CHECK: ![[INT_TYPE]] = !DIBasicType(name: "int", {{.*}}) - -// operator() of the local lambda should have the same return type as g() -// -// CHECK: distinct !DISubprogram(name: "operator()",{{.*}}, type: ![[FUN_TYPE_LAMBDA:[0-9]+]],{{.*}} -// CHECK: ![[FUN_TYPE_LAMBDA]] = !DISubroutineType({{.*}}types: ![[TYPE_NODE_LAMBDA:[0-9]+]]) -// CHECK: ![[TYPE_NODE_LAMBDA]] = !{![[INT_TYPE]], {{.*}} diff --git a/clang/test/CodeGenCXX/pragma-init_seg.cpp b/clang/test/CodeGenCXX/pragma-init_seg.cpp --- a/clang/test/CodeGenCXX/pragma-init_seg.cpp +++ b/clang/test/CodeGenCXX/pragma-init_seg.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -no-opaque-pointers %s -triple=i686-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple=i686-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s int f(); @@ -10,12 +10,12 @@ #pragma init_seg(compiler) int x = f(); // CHECK: @"?x@simple_init@@3HA" = dso_local global i32 0, align 4 -// CHECK: @__cxx_init_fn_ptr = private constant void ()* @"??__Ex@simple_init@@YAXXZ", section ".CRT$XCC" +// No function pointer! This one goes on @llvm.global_ctors. #pragma init_seg(lib) int y = f(); // CHECK: @"?y@simple_init@@3HA" = dso_local global i32 0, align 4 -// CHECK: @__cxx_init_fn_ptr.1 = private constant void ()* @"??__Ey@simple_init@@YAXXZ", section ".CRT$XCL" +// No function pointer! This one goes on @llvm.global_ctors. #pragma init_seg(user) int z = f(); @@ -29,14 +29,14 @@ namespace { int x = f(); // CHECK: @"?x@?A0x{{[^@]*}}@internal_init@@3HA" = internal global i32 0, align 4 -// CHECK: @__cxx_init_fn_ptr.2 = private constant void ()* @"??__Ex@?A0x{{[^@]*}}@internal_init@@YAXXZ", section ".asdf" +// CHECK: @__cxx_init_fn_ptr = private constant ptr @"??__Ex@?A0x{{[^@]*}}@internal_init@@YAXXZ", section ".asdf" } } namespace selectany_init { int __declspec(selectany) x = f(); // CHECK: @"?x@selectany_init@@3HA" = weak_odr dso_local global i32 0, comdat, align 4 -// CHECK: @__cxx_init_fn_ptr.3 = private constant void ()* @"??__Ex@selectany_init@@YAXXZ", section ".asdf", comdat($"?x@selectany_init@@3HA") +// CHECK: @__cxx_init_fn_ptr.1 = private constant ptr @"??__Ex@selectany_init@@YAXXZ", section ".asdf", comdat($"?x@selectany_init@@3HA") } namespace explicit_template_instantiation { @@ -44,7 +44,7 @@ template const int A::x = f(); template struct A; // CHECK: @"?x@?$A@H@explicit_template_instantiation@@2HB" = weak_odr dso_local global i32 0, comdat, align 4 -// CHECK: @__cxx_init_fn_ptr.4 = private constant void ()* @"??__E?x@?$A@H@explicit_template_instantiation@@2HB@@YAXXZ", section ".asdf", comdat($"?x@?$A@H@explicit_template_instantiation@@2HB") +// CHECK: @__cxx_init_fn_ptr.2 = private constant ptr @"??__E?x@?$A@H@explicit_template_instantiation@@2HB@@YAXXZ", section ".asdf", comdat($"?x@?$A@H@explicit_template_instantiation@@2HB") } namespace implicit_template_instantiation { @@ -52,21 +52,19 @@ template const int A::x = f(); int g() { return A::x; } // CHECK: @"?x@?$A@H@implicit_template_instantiation@@2HB" = linkonce_odr dso_local global i32 0, comdat, align 4 -// CHECK: @__cxx_init_fn_ptr.5 = private constant void ()* @"??__E?x@?$A@H@implicit_template_instantiation@@2HB@@YAXXZ", section ".asdf", comdat($"?x@?$A@H@implicit_template_instantiation@@2HB") +// CHECK: @__cxx_init_fn_ptr.3 = private constant ptr @"??__E?x@?$A@H@implicit_template_instantiation@@2HB@@YAXXZ", section ".asdf", comdat($"?x@?$A@H@implicit_template_instantiation@@2HB") } // ... and here's where we emitted user level ctors. -// CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] -// CHECK: [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_pragma_init_seg.cpp, i8* null }] +// CHECK: @llvm.global_ctors = appending global [3 x { i32, ptr, ptr }] +// CHECK: [{ i32, ptr, ptr } { i32 200, ptr @"??__Ex@simple_init@@YAXXZ", ptr @"?x@simple_init@@3HA" }, { i32, ptr, ptr } { i32 400, ptr @"??__Ey@simple_init@@YAXXZ", ptr @"?y@simple_init@@3HA" }, { i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_pragma_init_seg.cpp, ptr null }] // We have to mark everything used so we can survive globalopt, even through // LTO. There's no way LLVM could really understand if data in the .asdf // section is really used or dead. // -// CHECK: @llvm.used = appending global [6 x i8*] -// CHECK: [i8* bitcast (void ()** @__cxx_init_fn_ptr to i8*), -// CHECK: i8* bitcast (void ()** @__cxx_init_fn_ptr.1 to i8*), -// CHECK: i8* bitcast (void ()** @__cxx_init_fn_ptr.2 to i8*), -// CHECK: i8* bitcast (void ()** @__cxx_init_fn_ptr.3 to i8*), -// CHECK: i8* bitcast (void ()** @__cxx_init_fn_ptr.4 to i8*), -// CHECK: i8* bitcast (void ()** @__cxx_init_fn_ptr.5 to i8*)], section "llvm.metadata" +// CHECK: @llvm.used = appending global [4 x ptr] +// CHECK: [ptr @__cxx_init_fn_ptr, +// CHECK: ptr @__cxx_init_fn_ptr.1, +// CHECK: ptr @__cxx_init_fn_ptr.2, +// CHECK: ptr @__cxx_init_fn_ptr.3], section "llvm.metadata" diff --git a/clang/test/CodeGenCXX/threadlocal_address.cpp b/clang/test/CodeGenCXX/threadlocal_address.cpp --- a/clang/test/CodeGenCXX/threadlocal_address.cpp +++ b/clang/test/CodeGenCXX/threadlocal_address.cpp @@ -20,11 +20,11 @@ // CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[IA2]], align 4 // CHECK-NEXT: ret i32 %[[RET]] // -// CHECK: declare ptr @llvm.threadlocal.address.p0(ptr) #[[ATTR_NUM:.+]] +// CHECK: declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #[[ATTR_NUM:.+]] // // CHECK-O1-LABEL: @_Z1gv // CHECK-O1-NEXT: entry: -// CHECK-O1-NEXT: %[[I_ADDR:.+]] = {{.*}}call ptr @llvm.threadlocal.address.p0(ptr nonnull @i) +// CHECK-O1-NEXT: %[[I_ADDR:.+]] = {{.*}}call ptr @llvm.threadlocal.address.p0(ptr @i) // CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr %[[I_ADDR]] // CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 // CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[I_ADDR]] @@ -56,7 +56,7 @@ // // CHECK-O1-LABEL: @_Z1fv // CHECK-O1-NEXT: entry: -// CHECK-O1-NEXT: %[[J_ADDR:.+]] = {{.*}}call ptr @llvm.threadlocal.address.p0(ptr nonnull @_ZZ1fvE1j) +// CHECK-O1-NEXT: %[[J_ADDR:.+]] = {{.*}}call ptr @llvm.threadlocal.address.p0(ptr @_ZZ1fvE1j) // CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr %[[J_ADDR]] // CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 // CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[J_ADDR]] diff --git a/clang/test/Driver/Xarch.c b/clang/test/Driver/Xarch.c --- a/clang/test/Driver/Xarch.c +++ b/clang/test/Driver/Xarch.c @@ -1,12 +1,12 @@ -// RUN: %clang -target i386-apple-darwin9 -m32 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s +// RUN: %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s // O3ONCE: "-O3" // O3ONCE-NOT: "-O3" -// RUN: %clang -target i386-apple-darwin9 -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s +// RUN: %clang -target i386-apple-darwin11 -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s // O3NONE-NOT: "-O3" // O3NONE: argument unused during compilation: '-Xarch_i386 -O3' -// RUN: not %clang -target i386-apple-darwin9 -m32 -Xarch_i386 -o -Xarch_i386 -S %s -S -Xarch_i386 -o 2>&1 | FileCheck -check-prefix=INVALID %s +// RUN: not %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -o -Xarch_i386 -S %s -S -Xarch_i386 -o 2>&1 | FileCheck -check-prefix=INVALID %s // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o' // INVALID: error: invalid Xarch argument: '-Xarch_i386 -S' // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o' diff --git a/clang/test/Driver/apple-kext-mkernel.c b/clang/test/Driver/apple-kext-mkernel.c --- a/clang/test/Driver/apple-kext-mkernel.c +++ b/clang/test/Driver/apple-kext-mkernel.c @@ -1,20 +1,20 @@ -// RUN: %clang -target x86_64-apple-darwin10 -mkernel -### -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86 %s -// RUN: %clang -target x86_64-apple-darwin10 -mkernel -### -fsyntax-only -fbuiltin -fno-builtin -fcommon -fno-common %s 2>&1 | FileCheck --check-prefix=CHECK-X86 %s +// RUN: %clang -target x86_64-apple-darwin11 -mkernel -### -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86 %s +// RUN: %clang -target x86_64-apple-darwin11 -mkernel -### -fsyntax-only -fbuiltin -fno-builtin -fcommon -fno-common %s 2>&1 | FileCheck --check-prefix=CHECK-X86 %s // CHECK-X86: "-disable-red-zone" // CHECK-X86: "-fno-builtin" // CHECK-X86: "-fno-rtti" // CHECK-X86-NOT: "-fcommon" -// RUN: %clang -target x86_64-apple-darwin10 -mkernel -### -fsyntax-only -fbuiltin -fcommon %s 2>&1 | FileCheck --check-prefix=CHECK-X86-2 %s +// RUN: %clang -target x86_64-apple-darwin11 -mkernel -### -fsyntax-only -fbuiltin -fcommon %s 2>&1 | FileCheck --check-prefix=CHECK-X86-2 %s // CHECK-X86-2: "-disable-red-zone" // CHECK-X86-2-NOT: "-fno-builtin" // CHECK-X86-2: "-fno-rtti" // CHECK-X86-2-NOT: "-fno-common" -// RUN: %clang -target x86_64-apple-darwin10 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s -// RUN: %clang -target x86_64-apple-darwin10 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only -fbuiltin -fno-builtin -fcommon -fno-common %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s +// RUN: %clang -target x86_64-apple-darwin11 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s +// RUN: %clang -target x86_64-apple-darwin11 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only -fbuiltin -fno-builtin -fcommon -fno-common %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s // CHECK-ARM: "-target-feature" "+long-calls" // CHECK-ARM: "-target-feature" "+strict-align" @@ -23,6 +23,6 @@ // CHECK-ARM: "-fno-rtti" // CHECK-ARM-NOT: "-fcommon" -// RUN: %clang -target x86_64-apple-darwin10 \ +// RUN: %clang -target x86_64-apple-darwin11 \ // RUN: -Werror -fno-builtin -fno-exceptions -fno-common -fno-rtti \ // RUN: -mkernel -fsyntax-only %s diff --git a/clang/test/Driver/arc.c b/clang/test/Driver/arc.c --- a/clang/test/Driver/arc.c +++ b/clang/test/Driver/arc.c @@ -1,8 +1,8 @@ -// RUN: not %clang -ObjC -target i386-apple-darwin10 -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck %s -// RUN: not %clang -x objective-c -target i386-apple-darwin10 -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck %s -// RUN: not %clang -x objective-c++ -target i386-apple-darwin10 -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck %s -// RUN: not %clang -x c -target i386-apple-darwin10 -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck -check-prefix NOTOBJC %s -// RUN: not %clang -x c++ -target i386-apple-darwin10 -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck -check-prefix NOTOBJC %s +// RUN: not %clang -ObjC -target i386-apple-darwin10 -stdlib=libstdc++ -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck %s +// RUN: not %clang -x objective-c -target i386-apple-darwin10 -stdlib=libstdc++ -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck %s +// RUN: not %clang -x objective-c++ -target i386-apple-darwin10 -stdlib=libstdc++ -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck %s +// RUN: not %clang -x c -target i386-apple-darwin10 -stdlib=libstdc++ -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck -check-prefix NOTOBJC %s +// RUN: not %clang -x c++ -target i386-apple-darwin10 -stdlib=libstdc++ -m32 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck -check-prefix NOTOBJC %s // RUN: not %clang -x objective-c -target x86_64-apple-darwin11 -mmacosx-version-min=10.5 -fobjc-arc %s -fsyntax-only 2>&1 | FileCheck -check-prefix NOTSUPPORTED %s // Just to test clang is working. diff --git a/clang/test/Driver/avr-ld.c b/clang/test/Driver/avr-ld.c --- a/clang/test/Driver/avr-ld.c +++ b/clang/test/Driver/avr-ld.c @@ -1,44 +1,44 @@ -// RUN: %clang -### --target=avr -mmcu=at90s2313 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKA %s +// RUN: %clang -### --target=avr -mmcu=at90s2313 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKA %s // LINKA: {{".*ld.*"}} {{.*}} {{"-L.*tiny-stack"}} {{.*}} "-Tdata=0x800060" "--start-group" {{.*}} "-lat90s2313" {{.*}} "--end-group" "-mavr2" -// RUN: %clang -### --target=avr -mmcu=at90s8515 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKB %s +// RUN: %clang -### --target=avr -mmcu=at90s8515 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKB %s // LINKB: {{".*ld.*"}} {{.*}} "-Tdata=0x800060" "--start-group" {{.*}} "-lat90s8515" {{.*}} "--end-group" "-mavr2" -// RUN: %clang -### --target=avr -mmcu=attiny13 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKC %s +// RUN: %clang -### --target=avr -mmcu=attiny13 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKC %s // LINKC: {{".*ld.*"}} {{.*}} {{"-L.*avr25/tiny-stack"}} {{.*}} "-Tdata=0x800060" "--start-group" {{.*}} "-lattiny13" {{.*}} "--end-group" "-mavr25" -// RUN: %clang -### --target=avr -mmcu=attiny44 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKD %s +// RUN: %clang -### --target=avr -mmcu=attiny44 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKD %s // LINKD: {{".*ld.*"}} {{.*}} {{"-L.*avr25"}} {{.*}} "-Tdata=0x800060" "--start-group" {{.*}} "-lattiny44" {{.*}} "--end-group" "-mavr25" -// RUN: %clang -### --target=avr -mmcu=atmega103 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKE %s +// RUN: %clang -### --target=avr -mmcu=atmega103 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKE %s // LINKE: {{".*ld.*"}} {{.*}} {{"-L.*avr31"}} {{.*}} "-Tdata=0x800060" "--start-group" {{.*}} "-latmega103" {{.*}} "--end-group" "-mavr31" -// RUN: %clang -### --target=avr -mmcu=atmega8u2 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKF %s +// RUN: %clang -### --target=avr -mmcu=atmega8u2 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKF %s // LINKF: {{".*ld.*"}} {{.*}} {{"-L.*avr35"}} {{.*}} "-Tdata=0x800100" "--start-group" {{.*}} "-latmega8u2" {{.*}} "--end-group" "-mavr35" -// RUN: %clang -### --target=avr -mmcu=atmega48pa --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKG %s +// RUN: %clang -### --target=avr -mmcu=atmega48pa --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKG %s // LINKG: {{".*ld.*"}} {{.*}} {{"-L.*avr4"}} {{.*}} "-Tdata=0x800100" "--start-group" {{.*}} "-latmega48pa" {{.*}} "--end-group" "-mavr4" -// RUN: %clang -### --target=avr -mmcu=atmega328 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKH %s +// RUN: %clang -### --target=avr -mmcu=atmega328 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKH %s // LINKH: {{".*ld.*"}} {{.*}} {{"-L.*avr5"}} {{.*}} "-Tdata=0x800100" "--start-group" {{.*}} "-latmega328" {{.*}} "--end-group" "-mavr5" -// RUN: %clang -### --target=avr -mmcu=atmega1281 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKI %s +// RUN: %clang -### --target=avr -mmcu=atmega1281 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKI %s // LINKI: {{".*ld.*"}} {{.*}} {{"-L.*avr51"}} {{.*}} "-Tdata=0x800200" "--start-group" {{.*}} "-latmega1281" {{.*}} "--end-group" "-mavr51" -// RUN: %clang -### --target=avr -mmcu=atmega2560 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKJ %s +// RUN: %clang -### --target=avr -mmcu=atmega2560 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKJ %s // LINKJ: {{".*ld.*"}} {{.*}} {{"-L.*avr6"}} {{.*}} "-Tdata=0x800200" "--start-group" {{.*}} "-latmega2560" {{.*}} "--end-group" "-mavr6" -// RUN: %clang -### --target=avr -mmcu=attiny10 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKK %s +// RUN: %clang -### --target=avr -mmcu=attiny10 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKK %s // LINKK: {{".*ld.*"}} {{.*}} {{"-L.*avrtiny"}} {{.*}} "-Tdata=0x800040" "--start-group" {{.*}} "-lattiny10" {{.*}} "--end-group" "-mavrtiny" -// RUN: %clang -### --target=avr -mmcu=atxmega16a4 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKL %s +// RUN: %clang -### --target=avr -mmcu=atxmega16a4 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKL %s // LINKL: {{".*ld.*"}} {{.*}} {{"-L.*avrxmega2"}} {{.*}} "-Tdata=0x802000" "--start-group" {{.*}} "-latxmega16a4" {{.*}} "--end-group" "-mavrxmega2" -// RUN: %clang -### --target=avr -mmcu=atxmega64b3 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKM %s +// RUN: %clang -### --target=avr -mmcu=atxmega64b3 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKM %s // LINKM: {{".*ld.*"}} {{.*}} {{"-L.*avrxmega4"}} {{.*}} "-Tdata=0x802000" "--start-group" {{.*}} "-latxmega64b3" {{.*}} "--end-group" "-mavrxmega4" -// RUN: %clang -### --target=avr -mmcu=atxmega128a3u --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKN %s +// RUN: %clang -### --target=avr -mmcu=atxmega128a3u --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKN %s // LINKN: {{".*ld.*"}} {{.*}} {{"-L.*avrxmega6"}} {{.*}} "-Tdata=0x802000" "--start-group" {{.*}} "-latxmega128a3u" {{.*}} "--end-group" "-mavrxmega6" -// RUN: %clang -### --target=avr -mmcu=atxmega128a1 --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKO %s +// RUN: %clang -### --target=avr -mmcu=atxmega128a1 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKO %s // LINKO: {{".*ld.*"}} {{.*}} {{"-L.*avrxmega7"}} {{.*}} "-Tdata=0x802000" "--start-group" {{.*}} "-latxmega128a1" {{.*}} "--end-group" "-mavrxmega7" diff --git a/clang/test/Driver/avr-toolchain.c b/clang/test/Driver/avr-toolchain.c --- a/clang/test/Driver/avr-toolchain.c +++ b/clang/test/Driver/avr-toolchain.c @@ -73,7 +73,6 @@ // LDS1: "-T" "avr.lds" // LDS1-NOT: "-mavr5" -// RUN: %clang %s -### --target=avr -mmcu=atmega328 --sysroot=%S/Inputs/basic_avr_tree/ -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir 2>&1 | FileCheck --check-prefix=LIBGCC %s // RUN: %clang %s -### --target=avr -mmcu=atmega328 --sysroot=%S/Inputs/basic_avr_tree/ -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir --rtlib=libgcc 2>&1 | FileCheck --check-prefix=LIBGCC %s // LIBGCC: "-lgcc" // LIBGCC-NOT: libclang_rt diff --git a/clang/test/Driver/bindings.c b/clang/test/Driver/bindings.c --- a/clang/test/Driver/bindings.c +++ b/clang/test/Driver/bindings.c @@ -12,14 +12,14 @@ // RUN: %clang -target i386-unknown-unknown -ccc-print-bindings -fsyntax-only -x c++ %s 2>&1 | FileCheck %s --check-prefix=CHECK08 // CHECK08: "clang", inputs: ["{{.*}}bindings.c"], output: (nothing) -// RUN: %clang -target i386-apple-darwin9 -ccc-print-bindings %s -S -arch ppc 2>&1 | FileCheck %s --check-prefix=CHECK11 +// RUN: %clang -target i386-apple-darwin11 -ccc-print-bindings %s -S -arch ppc 2>&1 | FileCheck %s --check-prefix=CHECK11 // CHECK11: "clang", inputs: ["{{.*}}bindings.c"], output: "bindings.s" // RUN: %clang -target powerpc-unknown-unknown -ccc-print-bindings %s -S 2>&1 | FileCheck %s --check-prefix=CHECK12 // CHECK12: "clang", inputs: ["{{.*}}bindings.c"], output: "bindings.s" // Darwin bindings -// RUN: %clang -target i386-apple-darwin9 -no-integrated-as -ccc-print-bindings %s 2>&1 | FileCheck %s --check-prefix=CHECK14 +// RUN: %clang -target i386-apple-darwin11 -no-integrated-as -ccc-print-bindings %s 2>&1 | FileCheck %s --check-prefix=CHECK14 // CHECK14: "clang", inputs: ["{{.*}}bindings.c"], output: "{{.*}}.s" // CHECK14: "darwin::Assembler", inputs: ["{{.*}}.s"], output: "{{.*}}.o" // CHECK14: "darwin::Linker", inputs: ["{{.*}}.o"], output: "a.out" @@ -29,5 +29,5 @@ // CHECK15: "x86_64-unknown-linux-gnu" - "GNU::StaticLibTool", inputs: ["{{.*}}.o"], output: "a.out" // Darwin StaticLibTool binding -// RUN: %clang -target i386-apple-darwin9 -ccc-print-bindings --emit-static-lib %s 2>&1 | FileCheck %s --check-prefix=CHECK16 -// CHECK16: "i386-apple-darwin9" - "darwin::StaticLibTool", inputs: ["{{.*}}.o"], output: "a.out" +// RUN: %clang -target i386-apple-darwin11 -ccc-print-bindings --emit-static-lib %s 2>&1 | FileCheck %s --check-prefix=CHECK16 +// CHECK16: "i386-apple-darwin11" - "darwin::StaticLibTool", inputs: ["{{.*}}.o"], output: "a.out" diff --git a/clang/test/Driver/cc-log-diagnostics.c b/clang/test/Driver/cc-log-diagnostics.c --- a/clang/test/Driver/cc-log-diagnostics.c +++ b/clang/test/Driver/cc-log-diagnostics.c @@ -1,7 +1,7 @@ // RUN: rm -f %t.log // RUN: env RC_DEBUG_OPTIONS=1 \ // RUN: CC_LOG_DIAGNOSTICS=1 CC_LOG_DIAGNOSTICS_FILE=%t.log \ -// RUN: %clang -Wfoobar --target=x86_64-apple-darwin10 -fsyntax-only %s +// RUN: %clang -Wfoobar --target=x86_64-apple-darwin11 -fsyntax-only %s // RUN: FileCheck %s < %t.log int f0(void) {} diff --git a/clang/test/Driver/cpp-precomp.c b/clang/test/Driver/cpp-precomp.c --- a/clang/test/Driver/cpp-precomp.c +++ b/clang/test/Driver/cpp-precomp.c @@ -1,5 +1,5 @@ -// RUN: %clang -target x86_64-apple-darwin10 \ +// RUN: %clang -target x86_64-apple-darwin11 \ // RUN: -Werror -cpp-precomp -fsyntax-only %s -// RUN: %clang -target x86_64-apple-darwin10 \ +// RUN: %clang -target x86_64-apple-darwin11 \ // RUN: -Werror -no-cpp-precomp -fsyntax-only %s diff --git a/clang/test/Driver/darwin-debug-flags.c b/clang/test/Driver/darwin-debug-flags.c --- a/clang/test/Driver/darwin-debug-flags.c +++ b/clang/test/Driver/darwin-debug-flags.c @@ -1,15 +1,15 @@ -// RUN: env RC_DEBUG_OPTIONS=1 %clang -target i386-apple-darwin9 -I "path with \spaces" -g -Os %s -emit-llvm -S -o - | FileCheck %s +// RUN: env RC_DEBUG_OPTIONS=1 %clang -target i386-apple-darwin11 -I "path with \spaces" -g -Os %s -emit-llvm -S -o - | FileCheck %s // // RUN: touch %t.s -// RUN: env RC_DEBUG_OPTIONS=1 %clang -### -target i386-apple-darwin9 -c -g %t.s 2>&1 | FileCheck -check-prefix=S %s +// RUN: env RC_DEBUG_OPTIONS=1 %clang -### -target i386-apple-darwin11 -c -g %t.s 2>&1 | FileCheck -check-prefix=S %s // -// RUN: %clang -### -target i386-apple-darwin9 -c -g %t.s 2>&1 | FileCheck -check-prefix=P %s +// RUN: %clang -### -target i386-apple-darwin11 -c -g %t.s 2>&1 | FileCheck -check-prefix=P %s // CHECK: distinct !DICompileUnit( // CHECK-SAME: flags: // CHECK-SAME: -I path\\ with\\ \\\\spaces // CHECK-SAME: -g -Os -// CHECK-SAME: -mmacos-version-min=10.5.0 +// CHECK-SAME: -mmacos-version-min=10.7.0 int x; diff --git a/clang/test/Driver/darwin-dsymutil.c b/clang/test/Driver/darwin-dsymutil.c --- a/clang/test/Driver/darwin-dsymutil.c +++ b/clang/test/Driver/darwin-dsymutil.c @@ -1,6 +1,6 @@ // Check that we run dsymutil properly with multiple -arch options. // -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-phases \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-phases \ // RUN: -arch i386 -arch x86_64 %s -g 2> %t // RUN: FileCheck -check-prefix=CHECK-MULTIARCH-ACTIONS < %t %s // @@ -15,57 +15,57 @@ // CHECK-MULTIARCH-ACTIONS: 8: lipo, {6, 7}, image // CHECK-MULTIARCH-ACTIONS: 9: dsymutil, {8}, dSYM // -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-bindings \ // RUN: -arch i386 -arch x86_64 %s -g 2> %t // RUN: FileCheck -check-prefix=CHECK-MULTIARCH-BINDINGS < %t %s // -// CHECK-MULTIARCH-BINDINGS: "x86_64-apple-darwin10" - "darwin::Lipo", inputs: [{{.*}}, {{.*}}], output: "a.out" -// CHECK-MULTIARCH-BINDINGS: # "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["a.out"], output: "a.out.dSYM" +// CHECK-MULTIARCH-BINDINGS: "x86_64-apple-darwin11" - "darwin::Lipo", inputs: [{{.*}}, {{.*}}], output: "a.out" +// CHECK-MULTIARCH-BINDINGS: # "x86_64-apple-darwin11" - "darwin::Dsymutil", inputs: ["a.out"], output: "a.out.dSYM" // Check output name derivation. // -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-bindings \ // RUN: -o foo %s -g 2> %t // RUN: FileCheck -Doutfile=foo -Ddsymfile=foo.dSYM \ // RUN: -check-prefix=CHECK-OUTPUT-NAME < %t %s // -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-bindings \ // RUN: -o bar/foo %s -g 2> %t // RUN: FileCheck -Doutfile=bar/foo -Ddsymfile=bar/foo.dSYM \ // RUN: -check-prefix=CHECK-OUTPUT-NAME < %t %s // -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-bindings \ // RUN: -o bar/foo -dsym-dir external %s -g 2> %t // RUN: FileCheck -Doutfile=bar/foo -Ddsymfile=external/foo.dSYM \ // RUN: -check-prefix=CHECK-OUTPUT-NAME < %t %s // -// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Linker", inputs: [{{.*}}], output: "[[outfile]]" -// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["[[outfile]]"], output: "[[dsymfile]]" +// CHECK-OUTPUT-NAME: "x86_64-apple-darwin11" - "darwin::Linker", inputs: [{{.*}}], output: "[[outfile]]" +// CHECK-OUTPUT-NAME: "x86_64-apple-darwin11" - "darwin::Dsymutil", inputs: ["[[outfile]]"], output: "[[dsymfile]]" // Check output name derivation for multiple -arch options. // -// RUN: %clang -target x86_64-apple-darwin10 \ +// RUN: %clang -target x86_64-apple-darwin11 \ // RUN: -arch x86_64 -arch arm64 -ccc-print-bindings %s 2> %t // RUN: FileCheck --check-prefix=CHECK-MULTIARCH-OUTPUT-NAME < %t %s // -// CHECK-MULTIARCH-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Linker", inputs: ["{{.*}}{{/|\\}}darwin-dsymutil-x86_64.o"], output: "{{.*}}{{/|\\}}darwin-dsymutil-x86_64.out" -// CHECK-MULTIARCH-OUTPUT-NAME: "arm64-apple-darwin10" - "darwin::Linker", inputs: ["{{.*}}{{/|\\}}darwin-dsymutil-arm64.o"], output: "{{.*}}{{/|\\}}darwin-dsymutil-arm64.out" -// CHECK-MULTIARCH-OUTPUT-NAME: "arm64-apple-darwin10" - "darwin::Lipo", inputs: ["{{.*}}{{/|\\}}darwin-dsymutil-x86_64.out", "{{.*}}{{/|\\}}darwin-dsymutil-arm64.out"], output: "a.out" +// CHECK-MULTIARCH-OUTPUT-NAME: "x86_64-apple-darwin11" - "darwin::Linker", inputs: ["{{.*}}{{/|\\}}darwin-dsymutil-x86_64.o"], output: "{{.*}}{{/|\\}}darwin-dsymutil-x86_64.out" +// CHECK-MULTIARCH-OUTPUT-NAME: "arm64-apple-darwin11" - "darwin::Linker", inputs: ["{{.*}}{{/|\\}}darwin-dsymutil-arm64.o"], output: "{{.*}}{{/|\\}}darwin-dsymutil-arm64.out" +// CHECK-MULTIARCH-OUTPUT-NAME: "arm64-apple-darwin11" - "darwin::Lipo", inputs: ["{{.*}}{{/|\\}}darwin-dsymutil-x86_64.out", "{{.*}}{{/|\\}}darwin-dsymutil-arm64.out"], output: "a.out" // -// RUN: %clang -target x86_64-apple-darwin10 \ +// RUN: %clang -target x86_64-apple-darwin11 \ // RUN: -Wl,-foo -arch x86_64 -arch arm64 -ccc-print-bindings %s 2> %t // RUN: FileCheck --check-prefix=CHECK-MULTIARCH-OUTPUT-NAME-WITH-ARG < %t %s // -// CHECK-MULTIARCH-OUTPUT-NAME-WITH-ARG: "x86_64-apple-darwin10" - "darwin::Linker", inputs: [(input arg), "{{.*}}{{/|\\}}darwin-dsymutil-x86_64.o"], output: "{{.*}}{{/|\\}}darwin-dsymutil-x86_64.out" -// CHECK-MULTIARCH-OUTPUT-NAME-WITH-ARG: "arm64-apple-darwin10" - "darwin::Linker", inputs: [(input arg), "{{.*}}{{/|\\}}darwin-dsymutil-arm64.o"], output: "{{.*}}{{/|\\}}darwin-dsymutil-arm64.out" -// CHECK-MULTIARCH-OUTPUT-NAME-WITH-ARG: "arm64-apple-darwin10" - "darwin::Lipo", inputs: ["{{.*}}{{/|\\}}darwin-dsymutil-x86_64.out", "{{.*}}{{/|\\}}darwin-dsymutil-arm64.out"], output: "a.out" +// CHECK-MULTIARCH-OUTPUT-NAME-WITH-ARG: "x86_64-apple-darwin11" - "darwin::Linker", inputs: [(input arg), "{{.*}}{{/|\\}}darwin-dsymutil-x86_64.o"], output: "{{.*}}{{/|\\}}darwin-dsymutil-x86_64.out" +// CHECK-MULTIARCH-OUTPUT-NAME-WITH-ARG: "arm64-apple-darwin11" - "darwin::Linker", inputs: [(input arg), "{{.*}}{{/|\\}}darwin-dsymutil-arm64.o"], output: "{{.*}}{{/|\\}}darwin-dsymutil-arm64.out" +// CHECK-MULTIARCH-OUTPUT-NAME-WITH-ARG: "arm64-apple-darwin11" - "darwin::Lipo", inputs: ["{{.*}}{{/|\\}}darwin-dsymutil-x86_64.out", "{{.*}}{{/|\\}}darwin-dsymutil-arm64.out"], output: "a.out" // Check that we only use dsymutil when needed. // // RUN: touch %t.o -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-bindings \ // RUN: -o foo %t.o -g 2> %t // RUN: not grep "Dsymutil" %t // Check that we don't crash when translating arguments for dsymutil. -// RUN: %clang -m32 -target x86_64-apple-darwin10 -arch x86_64 -g %s -### +// RUN: %clang -m32 -target x86_64-apple-darwin11 -arch x86_64 -g %s -### diff --git a/clang/test/Driver/darwin-iphone-defaults.m b/clang/test/Driver/darwin-iphone-defaults.m --- a/clang/test/Driver/darwin-iphone-defaults.m +++ b/clang/test/Driver/darwin-iphone-defaults.m @@ -1,4 +1,4 @@ -// RUN: %clang -target i386-apple-darwin9 -miphoneos-version-min=3.0 -arch armv7 -stdlib=platform -flto -S -o - %s | FileCheck %s +// RUN: %clang -target i386-apple-darwin -miphoneos-version-min=5.0 -arch armv7 -stdlib=platform -flto -S -o - %s | FileCheck %s // CHECK: @f0() [[F0:#[0-9]+]] // CHECK: @__f0_block_invoke diff --git a/clang/test/Driver/darwin-stdlib.cpp b/clang/test/Driver/darwin-stdlib.cpp --- a/clang/test/Driver/darwin-stdlib.cpp +++ b/clang/test/Driver/darwin-stdlib.cpp @@ -1,14 +1,10 @@ -// This test will fail if CLANG_DEFAULT_CXX_STDLIB is set to anything different -// than the platform default. (see https://llvm.org/bugs/show_bug.cgi?id=30548) -// XFAIL: default-cxx-stdlib-set +// This test will fail if CLANG_DEFAULT_CXX_STDLIB is set to libstdc++. +// XFAIL: default-cxx-stdlib=libstdc++ -// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -arch arm64 -miphoneos-version-min=7.0 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-LIBCXX -// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -mmacosx-version-min=10.8 -Wno-stdlibcxx-not-found %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-LIBSTDCXX -// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -mmacosx-version-min=10.9 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-LIBCXX -// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -arch armv7s -miphoneos-version-min=6.1 -Wno-stdlibcxx-not-found %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-LIBSTDCXX -// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -arch armv7s -miphoneos-version-min=7.0 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-LIBCXX -// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -arch armv7k %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-LIBCXX +// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -arch arm64 -miphoneos-version-min=7.0 %s -### 2>&1 | FileCheck %s +// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -mmacosx-version-min=10.9 %s -### 2>&1 | FileCheck %s +// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -arch armv7s -miphoneos-version-min=7.0 %s -### 2>&1 | FileCheck %s +// RUN: %clang -target x86_64-apple-darwin -ccc-install-dir %S/Inputs/darwin_toolchain_tree/bin/ -arch armv7k %s -### 2>&1 | FileCheck %s -// CHECK-LIBCXX: "-stdlib=libc++" -// CHECK-LIBSTDCXX-NOT: -stdlib=libc++ -// CHECK-LIBSTDCXX-NOT: -stdlib=libstdc++ +// CHECK: "-stdlib=libc++" +// CHECK-NOT: "-stdlib=libstdc++" diff --git a/clang/test/Driver/darwin-verify-debug.c b/clang/test/Driver/darwin-verify-debug.c --- a/clang/test/Driver/darwin-verify-debug.c +++ b/clang/test/Driver/darwin-verify-debug.c @@ -1,6 +1,6 @@ // Check that we verify debug output properly with multiple -arch options. // -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-phases \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-phases \ // RUN: --verify-debug-info -arch i386 -arch x86_64 %s -g 2> %t // RUN: FileCheck -check-prefix=CHECK-MULTIARCH-ACTIONS < %t %s // @@ -8,26 +8,26 @@ // CHECK-MULTIARCH-ACTIONS: 9: dsymutil, {8}, dSYM // CHECK-MULTIARCH-ACTIONS: 10: verify-debug-info, {9}, none // -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-bindings \ // RUN: --verify-debug-info -arch i386 -arch x86_64 %s -g 2> %t // RUN: FileCheck -check-prefix=CHECK-MULTIARCH-BINDINGS < %t %s // -// CHECK-MULTIARCH-BINDINGS: # "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["a.out"], output: "a.out.dSYM" -// CHECK-MULTIARCH-BINDINGS: # "x86_64-apple-darwin10" - "darwin::VerifyDebug", inputs: ["a.out.dSYM"], output: (nothing) +// CHECK-MULTIARCH-BINDINGS: # "x86_64-apple-darwin11" - "darwin::Dsymutil", inputs: ["a.out"], output: "a.out.dSYM" +// CHECK-MULTIARCH-BINDINGS: # "x86_64-apple-darwin11" - "darwin::VerifyDebug", inputs: ["a.out.dSYM"], output: (nothing) // Check output name derivation. // -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-bindings \ // RUN: --verify-debug-info -o foo %s -g 2> %t // RUN: FileCheck -check-prefix=CHECK-OUTPUT-NAME < %t %s // -// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Linker", inputs: [{{.*}}], output: "foo" -// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["foo"], output: "foo.dSYM" -// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::VerifyDebug", inputs: ["foo.dSYM"], output: (nothing) +// CHECK-OUTPUT-NAME: "x86_64-apple-darwin11" - "darwin::Linker", inputs: [{{.*}}], output: "foo" +// CHECK-OUTPUT-NAME: "x86_64-apple-darwin11" - "darwin::Dsymutil", inputs: ["foo"], output: "foo.dSYM" +// CHECK-OUTPUT-NAME: "x86_64-apple-darwin11" - "darwin::VerifyDebug", inputs: ["foo.dSYM"], output: (nothing) // Check that we only verify when needed. // // RUN: touch %t.o -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: %clang -target x86_64-apple-darwin11 -ccc-print-bindings \ // RUN: --verify-debug-info -o foo %t.o -g 2> %t // RUN: not grep "Verify" %t diff --git a/clang/test/Driver/diagnostics.c b/clang/test/Driver/diagnostics.c --- a/clang/test/Driver/diagnostics.c +++ b/clang/test/Driver/diagnostics.c @@ -6,37 +6,37 @@ // diagnostics when only compiling for all targets. // This is normally a non-fatal warning: -// RUN: %clang --target=x86_64-apple-darwin10 \ +// RUN: %clang --target=x86_64-apple-darwin11 \ // RUN: -fsyntax-only -lfoo %s 2>&1 | FileCheck %s // Either with a specific -Werror=unused.. or a blanket -Werror, this // causes the command to fail. -// RUN: not %clang --target=x86_64-apple-darwin10 \ +// RUN: not %clang --target=x86_64-apple-darwin11 \ // RUN: -fsyntax-only -lfoo \ // RUN: -Werror=unused-command-line-argument %s 2>&1 | FileCheck %s -// RUN: not %clang --target=x86_64-apple-darwin10 \ +// RUN: not %clang --target=x86_64-apple-darwin11 \ // RUN: -fsyntax-only -lfoo -Werror %s 2>&1 | FileCheck %s // With a specific -Wno-..., no diagnostic should be printed. -// RUN: %clang --target=x86_64-apple-darwin10 \ +// RUN: %clang --target=x86_64-apple-darwin11 \ // RUN: -fsyntax-only -lfoo -Werror \ // RUN: -Wno-unused-command-line-argument %s 2>&1 | count 0 // With -Qunused-arguments, no diagnostic should be printed. -// RUN: %clang --target=x86_64-apple-darwin10 \ +// RUN: %clang --target=x86_64-apple-darwin11 \ // RUN: -fsyntax-only -lfoo -Werror \ // RUN: -Qunused-arguments %s 2>&1 | count 0 // With the argument enclosed in --{start,end}-no-unused-arguments, // there's no diagnostic. -// RUN: %clang --target=x86_64-apple-darwin10 -fsyntax-only \ +// RUN: %clang --target=x86_64-apple-darwin11 -fsyntax-only \ // RUN: --start-no-unused-arguments -lfoo --end-no-unused-arguments \ // RUN: -Werror %s 2>&1 | count 0 // With --{start,end}-no-unused-argument around a different argument, it // still warns about the unused argument. -// RUN: not %clang --target=x86_64-apple-darwin10 \ +// RUN: not %clang --target=x86_64-apple-darwin11 \ // RUN: --start-no-unused-arguments -fsyntax-only --end-no-unused-arguments \ // RUN: -lfoo -Werror %s 2>&1 | FileCheck %s diff --git a/clang/test/Driver/exceptions.m b/clang/test/Driver/exceptions.m --- a/clang/test/Driver/exceptions.m +++ b/clang/test/Driver/exceptions.m @@ -1,4 +1,4 @@ -// RUN: %clang -target x86_64-apple-darwin9 \ +// RUN: %clang -target x86_64-apple-darwin11 \ // RUN: -fsyntax-only -fno-exceptions %s void f1(void) { diff --git a/clang/test/Driver/redundant-args.c b/clang/test/Driver/redundant-args.c --- a/clang/test/Driver/redundant-args.c +++ b/clang/test/Driver/redundant-args.c @@ -1,2 +1,2 @@ -// RUN: %clang -target x86_64-apple-darwin10 \ +// RUN: %clang -target x86_64-apple-darwin11 \ // RUN: -Werror -x c -x c -fsyntax-only %s diff --git a/clang/test/Headers/float-darwin.c b/clang/test/Headers/float-darwin.c --- a/clang/test/Headers/float-darwin.c +++ b/clang/test/Headers/float-darwin.c @@ -1,5 +1,5 @@ // REQUIRES: system-darwin -// RUN: %clang -target x86_64-apple-darwin10 -fsyntax-only -std=c11 -isysroot %S/Inputs %s +// RUN: %clang -target x86_64-apple-darwin11 -fsyntax-only -std=c11 -isysroot %S/Inputs %s #include // Test the #include_next on float.h works on Darwin. diff --git a/clang/test/Headers/tgmath-darwin.c b/clang/test/Headers/tgmath-darwin.c --- a/clang/test/Headers/tgmath-darwin.c +++ b/clang/test/Headers/tgmath-darwin.c @@ -1,5 +1,5 @@ // REQUIRES: system-darwin -// RUN: %clang -target x86_64-apple-darwin10 -fsyntax-only -std=c11 -isysroot %S/Inputs %s +// RUN: %clang -target x86_64-apple-darwin11 -fsyntax-only -std=c11 -isysroot %S/Inputs %s #include // Test the #include_next of tgmath.h works on Darwin. diff --git a/clang/test/PCH/reloc.c b/clang/test/PCH/reloc.c --- a/clang/test/PCH/reloc.c +++ b/clang/test/PCH/reloc.c @@ -1,8 +1,8 @@ -// RUN: %clang -target x86_64-apple-darwin10 --relocatable-pch -o %t \ +// RUN: %clang -target x86_64-apple-darwin11 --relocatable-pch -o %t \ // RUN: -isysroot %S/Inputs/libroot %S/Inputs/libroot/usr/include/reloc.h -// RUN: %clang -target x86_64-apple-darwin10 -fsyntax-only \ +// RUN: %clang -target x86_64-apple-darwin11 -fsyntax-only \ // RUN: -include-pch %t -isysroot %S/Inputs/libroot %s -Xclang -verify -// RUN: not %clang -target x86_64-apple-darwin10 -include-pch %t %s +// RUN: not %clang -target x86_64-apple-darwin11 -include-pch %t %s // REQUIRES: x86-registered-target #include diff --git a/clang/test/SemaCXX/member-class-11.cpp b/clang/test/SemaCXX/member-class-11.cpp --- a/clang/test/SemaCXX/member-class-11.cpp +++ b/clang/test/SemaCXX/member-class-11.cpp @@ -26,4 +26,56 @@ ~B(); // expected-error {{expected the class name after '~' to name the enclosing class}} }; +template +struct D { + friend T::S::~S(); +private: + static constexpr int secret = 42; +}; + +template +struct E { + friend T::S::~V(); +}; + +struct BadInstantiation { + struct S { + struct V {}; + }; +}; + +struct GoodInstantiation { + struct V { + ~V(); + }; + using S = V; +}; + +// FIXME: We should diagnose this while instantiating. +E x; +E y; + +struct Q { + struct S { ~S(); }; +}; + +Q::S::~S() { + void foo(int); + foo(D::secret); +} + +struct X { + ~X(); +}; +struct Y; + +struct Z1 { + friend X::~Y(); // expected-error {{expected the class name after '~' to name the enclosing class}} +}; + +template +struct Z2 { + friend X::~Y(); // expected-error {{expected the class name after '~' to name the enclosing class}} +}; + } diff --git a/clang/test/SemaCXX/unreachable-code.cpp b/clang/test/SemaCXX/unreachable-code.cpp --- a/clang/test/SemaCXX/unreachable-code.cpp +++ b/clang/test/SemaCXX/unreachable-code.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fsyntax-only -Wunreachable-code-aggressive -fblocks -verify %s +// RUN: %clang_cc1 -std=c++17 -fcxx-exceptions -fexceptions -fsyntax-only -Wunreachable-code-aggressive -fblocks -verify %s int j; int bar(); @@ -99,3 +99,34 @@ } } + +namespace gh57123 { + bool foo() { + if constexpr (true) { + if (true) + return true; + else + return false; // expected-warning {{will never be executed}} + } + else + return false; // no-warning + } + + bool bar() { + if (true) + return true; + else + return false; // expected-warning {{will never be executed}} + } + + bool baz() { + if constexpr (true) + return true; + else { + if (true) + return true; + else + return false; // expected-warning {{will never be executed}} + } + } +} diff --git a/clang/test/SemaCXX/warn-comma-operator.cpp b/clang/test/SemaCXX/warn-comma-operator.cpp --- a/clang/test/SemaCXX/warn-comma-operator.cpp +++ b/clang/test/SemaCXX/warn-comma-operator.cpp @@ -140,6 +140,27 @@ // CHECK: fix-it:{{.*}}:{[[@LINE-8]]:46-[[@LINE-8]]:46}:")" } + +void void_func(); +int int_func() { return 0; } + +void void_function_comma(){ + void_func(), int_func(); // expected no -Wcomma because of the returning type `void` + // Reported by https://github.com/llvm/llvm-project/issues/57151 + // Descriptions about -Wcomma: https://reviews.llvm.org/D3976 +} + +typedef void Void; +Void typedef_func(); + +void whatever() { + // We don't get confused about type aliases. + typedef_func(), int_func(); + // Even function pointers don't confuse us. + void (*fp)() = void_func; + fp(), int_func(); +} + #ifdef __cplusplus class S2 { public: @@ -296,4 +317,23 @@ (void)T{}, 0; static_cast(T{}), 0; } + +namespace { + +// issue #57151 + +struct S { + void mem() {} +}; + +void whatever() { + struct S s; + // Member function calls also work as expected. + s.mem(), int_func(); + // As do lambda calls. + []() { return; }(), int_func(); +} + +} // namespace + #endif // ifdef __cplusplus diff --git a/clang/test/SemaTemplate/ms-unqualified-base-class.cpp b/clang/test/SemaTemplate/ms-unqualified-base-class.cpp --- a/clang/test/SemaTemplate/ms-unqualified-base-class.cpp +++ b/clang/test/SemaTemplate/ms-unqualified-base-class.cpp @@ -83,3 +83,37 @@ return I; } + +template class Vec {}; // expected-note {{template is declared here}} + +template class Index : public Vec { + // after-error@+1 {{member initializer 'Vec' does not name a non-static data member or base class}} + Index() : Vec() {} // before-warning {{unqualified base initializer of class templates is a Microsoft extension}} +}; + +template class Index<0>; + +template class Array : public Vec { + // after-error@+1 {{member initializer 'Vec' does not name a non-static data member or base class}} + Array() : Vec() {} // before-warning {{unqualified base initializer of class templates is a Microsoft extension}} +}; + +template class Array; + +template class Wrong : public Vec { + Wrong() : NonExistent() {} // expected-error {{member initializer 'NonExistent' does not name a non-static data member or base class}} +}; + +template class Wrong; + +template class Wrong2 : public Vec { + Wrong2() : Vec() {} // expected-error {{too few template arguments for class template 'Vec'}} +}; + +template class Wrong2; + +template class Wrong3 : public Vec { + Wrong3() : Base() {} // expected-error {{member initializer 'Base' does not name a non-static data member or base class}} +}; + +template class Wrong3; diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -134,7 +134,7 @@ # Set available features we allow tests to conditionalize on. # if config.clang_default_cxx_stdlib != '': - config.available_features.add('default-cxx-stdlib-set') + config.available_features.add('default-cxx-stdlib={}'.format(config.clang_default_cxx_stdlib)) # As of 2011.08, crash-recovery tests still do not pass on FreeBSD. if platform.system() not in ['FreeBSD']: diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -853,8 +853,8 @@ // `__start_` and `__stop_` symbols. bool isValidCIdentifier(StringRef S) { return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && - std::all_of(S.begin() + 1, S.end(), - [](char C) { return C == '_' || isAlnum(C); }); + llvm::all_of(llvm::drop_begin(S), + [](char C) { return C == '_' || isAlnum(C); }); } Error linkBitcodeFiles(SmallVectorImpl &InputFiles, diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/Driver/Driver.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/DependencyScanning/DependencyScanningService.h" @@ -268,10 +269,7 @@ Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)}); } - ID.CommandLine = - FD.getCommandLine([&](const ModuleID &MID, ModuleOutputKind MOK) { - return lookupModuleOutput(MID, MOK); - }); + ID.CommandLine = FD.CommandLine; Inputs.push_back(std::move(ID)); } @@ -301,10 +299,7 @@ {"file-deps", toJSONSorted(MD.FileDeps)}, {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)}, {"clang-modulemap-file", MD.ClangModuleMapFile}, - {"command-line", MD.getCanonicalCommandLine( - [&](const ModuleID &MID, ModuleOutputKind MOK) { - return lookupModuleOutput(MID, MOK); - })}, + {"command-line", MD.getCanonicalCommandLine()}, }; OutModules.push_back(std::move(O)); } @@ -330,42 +325,6 @@ } private: - std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK) { - // Cache the PCM path, since it will be queried repeatedly for each module. - // The other outputs are only queried once during getCanonicalCommandLine. - auto PCMPath = PCMPaths.insert({MID, ""}); - if (PCMPath.second) - PCMPath.first->second = constructPCMPath(MID); - switch (MOK) { - case ModuleOutputKind::ModuleFile: - return PCMPath.first->second; - case ModuleOutputKind::DependencyFile: - return PCMPath.first->second + ".d"; - case ModuleOutputKind::DependencyTargets: - // Null-separate the list of targets. - return join(ModuleDepTargets, StringRef("\0", 1)); - case ModuleOutputKind::DiagnosticSerializationFile: - return PCMPath.first->second + ".diag"; - } - llvm_unreachable("Fully covered switch above!"); - } - - /// Construct a path for the explicitly built PCM. - std::string constructPCMPath(ModuleID MID) const { - auto MDIt = Modules.find(IndexedModuleID{MID, 0}); - assert(MDIt != Modules.end()); - const ModuleDeps &MD = MDIt->second; - - StringRef Filename = llvm::sys::path::filename(MD.ImplicitModulePCMPath); - StringRef ModuleCachePath = llvm::sys::path::parent_path( - llvm::sys::path::parent_path(MD.ImplicitModulePCMPath)); - - SmallString<256> ExplicitPCMPath(!ModuleFilesDir.empty() ? ModuleFilesDir - : ModuleCachePath); - llvm::sys::path::append(ExplicitPCMPath, MD.ID.ContextHash, Filename); - return std::string(ExplicitPCMPath); - } - struct IndexedModuleID { ModuleID ID; mutable size_t InputIndex; @@ -395,7 +354,6 @@ std::mutex Lock; std::unordered_map Modules; - std::unordered_map PCMPaths; std::vector Inputs; }; @@ -417,6 +375,42 @@ return false; } +/// Construct a path for the explicitly built PCM. +static std::string constructPCMPath(ModuleID MID, StringRef OutputDir) { + SmallString<256> ExplicitPCMPath(OutputDir); + llvm::sys::path::append(ExplicitPCMPath, MID.ContextHash, + MID.ModuleName + "-" + MID.ContextHash + ".pcm"); + return std::string(ExplicitPCMPath); +} + +static std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK, + StringRef OutputDir) { + std::string PCMPath = constructPCMPath(MID, OutputDir); + switch (MOK) { + case ModuleOutputKind::ModuleFile: + return PCMPath; + case ModuleOutputKind::DependencyFile: + return PCMPath + ".d"; + case ModuleOutputKind::DependencyTargets: + // Null-separate the list of targets. + return join(ModuleDepTargets, StringRef("\0", 1)); + case ModuleOutputKind::DiagnosticSerializationFile: + return PCMPath + ".diag"; + } + llvm_unreachable("Fully covered switch above!"); +} + +static std::string getModuleCachePath(ArrayRef Args) { + for (StringRef Arg : llvm::reverse(Args)) { + Arg.consume_front("/clang:"); + if (Arg.consume_front("-fmodules-cache-path=")) + return std::string(Arg); + } + SmallString<128> Path; + driver::Driver::getDefaultModuleCachePath(Path); + return std::string(Path); +} + int main(int argc, const char **argv) { llvm::InitLLVM X(argc, argv); llvm::cl::HideUnrelatedOptions(DependencyScannerCategory); @@ -545,6 +539,14 @@ Optional MaybeModuleName; if (!ModuleName.empty()) MaybeModuleName = ModuleName; + + std::string OutputDir(ModuleFilesDir); + if (OutputDir.empty()) + OutputDir = getModuleCachePath(Input->CommandLine); + auto LookupOutput = [&](const ModuleID &MID, ModuleOutputKind MOK) { + return ::lookupModuleOutput(MID, MOK, OutputDir); + }; + // Run the tool on it. if (Format == ScanningOutputFormat::Make) { auto MaybeFile = WorkerTools[I]->getDependencyFile( @@ -554,7 +556,8 @@ HadErrors = true; } else { auto MaybeFullDeps = WorkerTools[I]->getFullDependencies( - Input->CommandLine, CWD, AlreadySeenModules, MaybeModuleName); + Input->CommandLine, CWD, AlreadySeenModules, LookupOutput, + MaybeModuleName); if (handleFullDependencyToolResult(Filename, MaybeFullDeps, FD, LocalIndex, DependencyOS, Errs)) HadErrors = true; diff --git a/clang/unittests/AST/DeclTest.cpp b/clang/unittests/AST/DeclTest.cpp --- a/clang/unittests/AST/DeclTest.cpp +++ b/clang/unittests/AST/DeclTest.cpp @@ -279,3 +279,35 @@ EXPECT_TRUE(f->getNonTransparentDeclContext()->isFileContext()); } + +TEST(Decl, MemberFunctionInModules) { + llvm::Annotations Code(R"( + module; + class G { + void bar() {} + }; + export module M; + class A { + void foo() {} + }; + )"); + + auto AST = + tooling::buildASTFromCodeWithArgs(Code.code(), /*Args=*/{"-std=c++20"}); + ASTContext &Ctx = AST->getASTContext(); + + auto *foo = selectFirst( + "foo", match(functionDecl(hasName("foo")).bind("foo"), Ctx)); + + // The function defined within a class definition is not implicitly inline + // if it is not attached to global module + EXPECT_FALSE(foo->isInlined()); + + auto *bar = selectFirst( + "bar", match(functionDecl(hasName("bar")).bind("bar"), Ctx)); + + // In global module, the function defined within a class definition is + // implicitly inline. + EXPECT_TRUE(bar->isInlined()); +} + diff --git a/clang/unittests/Analysis/CFGBuildResult.h b/clang/unittests/Analysis/CFGBuildResult.h --- a/clang/unittests/Analysis/CFGBuildResult.h +++ b/clang/unittests/Analysis/CFGBuildResult.h @@ -56,13 +56,15 @@ TheBuildResult = BuildResult::SawFunctionBody; Options.AddImplicitDtors = true; if (std::unique_ptr Cfg = - CFG::buildCFG(nullptr, Body, Result.Context, Options)) + CFG::buildCFG(Func, Body, Result.Context, Options)) TheBuildResult = {BuildResult::BuiltCFG, Func, std::move(Cfg), std::move(AST)}; } }; -inline BuildResult BuildCFG(const char *Code, CFG::BuildOptions Options = {}) { +template +BuildResult BuildCFG(const char *Code, CFG::BuildOptions Options = {}, + FuncMatcherT FuncMatcher = ast_matchers::anything()) { std::vector Args = {"-std=c++11", "-fno-delayed-template-parsing"}; std::unique_ptr AST = tooling::buildASTFromCodeWithArgs(Code, Args); @@ -72,7 +74,8 @@ CFGCallback Callback(std::move(AST)); Callback.Options = Options; ast_matchers::MatchFinder Finder; - Finder.addMatcher(ast_matchers::functionDecl().bind("func"), &Callback); + Finder.addMatcher(ast_matchers::functionDecl(FuncMatcher).bind("func"), + &Callback); Finder.matchAST(Callback.AST->getASTContext()); return std::move(Callback.TheBuildResult); diff --git a/clang/unittests/Analysis/CFGTest.cpp b/clang/unittests/Analysis/CFGTest.cpp --- a/clang/unittests/Analysis/CFGTest.cpp +++ b/clang/unittests/Analysis/CFGTest.cpp @@ -70,6 +70,27 @@ EXPECT_EQ(BuildResult::BuiltCFG, BuildCFG(Code).getStatus()); } +// Constructing a CFG with a dependent base should not crash. +TEST(CFG, DependantBaseAddImplicitDtors) { + const char *Code = R"( + template + struct Base { + virtual ~Base() {} + }; + + template + struct Derived : public Base { + virtual ~Derived() {} + }; + )"; + CFG::BuildOptions Options; + Options.AddImplicitDtors = true; + Options.setAllAlwaysAdd(); + EXPECT_EQ(BuildResult::BuiltCFG, + BuildCFG(Code, Options, ast_matchers::hasName("~Derived")) + .getStatus()); +} + TEST(CFG, IsLinear) { auto expectLinear = [](bool IsLinear, const char *Code) { BuildResult B = BuildCFG(Code); diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3902,6 +3902,36 @@ {TransferOptions{/*.ContextSensitiveOpts=*/llvm::None}}); } +TEST(TransferTest, ContextSensitiveDepthZero) { + std::string Code = R"( + bool GiveBool(); + void SetBool(bool &Var) { Var = true; } + + void target() { + bool Foo = GiveBool(); + SetBool(Foo); + // [[p]] + } + )"; + runDataflow(Code, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + auto &FooVal = + *cast(Env.getValue(*FooDecl, SkipPast::None)); + EXPECT_FALSE(Env.flowConditionImplies(FooVal)); + EXPECT_FALSE(Env.flowConditionImplies(Env.makeNot(FooVal))); + }, + {TransferOptions{ContextSensitiveOptions{/*.Depth=*/0}}}); +} + TEST(TransferTest, ContextSensitiveSetTrue) { std::string Code = R"( bool GiveBool(); @@ -4000,7 +4030,7 @@ {TransferOptions{ContextSensitiveOptions{}}}); } -TEST(TransferTest, ContextSensitiveSetTwoLayers) { +TEST(TransferTest, ContextSensitiveSetTwoLayersDepthOne) { std::string Code = R"( bool GiveBool(); void SetBool1(bool &Var) { Var = true; } @@ -4028,7 +4058,146 @@ EXPECT_FALSE(Env.flowConditionImplies(FooVal)); EXPECT_FALSE(Env.flowConditionImplies(Env.makeNot(FooVal))); }, - {TransferOptions{ContextSensitiveOptions{}}}); + {TransferOptions{ContextSensitiveOptions{/*.Depth=*/1}}}); +} + +TEST(TransferTest, ContextSensitiveSetTwoLayersDepthTwo) { + std::string Code = R"( + bool GiveBool(); + void SetBool1(bool &Var) { Var = true; } + void SetBool2(bool &Var) { SetBool1(Var); } + + void target() { + bool Foo = GiveBool(); + SetBool2(Foo); + // [[p]] + } + )"; + runDataflow(Code, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + auto &FooVal = + *cast(Env.getValue(*FooDecl, SkipPast::None)); + EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + }, + {TransferOptions{ContextSensitiveOptions{/*.Depth=*/2}}}); +} + +TEST(TransferTest, ContextSensitiveSetThreeLayersDepthTwo) { + std::string Code = R"( + bool GiveBool(); + void SetBool1(bool &Var) { Var = true; } + void SetBool2(bool &Var) { SetBool1(Var); } + void SetBool3(bool &Var) { SetBool2(Var); } + + void target() { + bool Foo = GiveBool(); + SetBool3(Foo); + // [[p]] + } + )"; + runDataflow(Code, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + auto &FooVal = + *cast(Env.getValue(*FooDecl, SkipPast::None)); + EXPECT_FALSE(Env.flowConditionImplies(FooVal)); + EXPECT_FALSE(Env.flowConditionImplies(Env.makeNot(FooVal))); + }, + {TransferOptions{ContextSensitiveOptions{/*.Depth=*/2}}}); +} + +TEST(TransferTest, ContextSensitiveSetThreeLayersDepthThree) { + std::string Code = R"( + bool GiveBool(); + void SetBool1(bool &Var) { Var = true; } + void SetBool2(bool &Var) { SetBool1(Var); } + void SetBool3(bool &Var) { SetBool2(Var); } + + void target() { + bool Foo = GiveBool(); + SetBool3(Foo); + // [[p]] + } + )"; + runDataflow(Code, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + auto &FooVal = + *cast(Env.getValue(*FooDecl, SkipPast::None)); + EXPECT_TRUE(Env.flowConditionImplies(FooVal)); + }, + {TransferOptions{ContextSensitiveOptions{/*.Depth=*/3}}}); +} + +TEST(TransferTest, ContextSensitiveMutualRecursion) { + std::string Code = R"( + bool Pong(bool X, bool Y); + + bool Ping(bool X, bool Y) { + if (X) { + return Y; + } else { + return Pong(!X, Y); + } + } + + bool Pong(bool X, bool Y) { + if (Y) { + return X; + } else { + return Ping(X, !Y); + } + } + + void target() { + bool Foo = Ping(false, false); + // [[p]] + } + )"; + runDataflow(Code, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + // The analysis doesn't crash... + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + auto &FooVal = + *cast(Env.getValue(*FooDecl, SkipPast::None)); + // ... but it also can't prove anything here. + EXPECT_FALSE(Env.flowConditionImplies(FooVal)); + EXPECT_FALSE(Env.flowConditionImplies(Env.makeNot(FooVal))); + }, + {TransferOptions{ContextSensitiveOptions{/*.Depth=*/4}}}); } TEST(TransferTest, ContextSensitiveSetMultipleLines) { diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -584,6 +584,17 @@ " void f() {}")); } +TEST_F(FormatTestJava, AlignDeclarations) { + FormatStyle Style = getLLVMStyle(FormatStyle::LK_Java); + Style.AlignConsecutiveDeclarations.Enabled = true; + verifyFormat("private final String[] args;\n" + "private final A_ParserHelper parserHelper;\n" + "private final int numOfCmdArgs;\n" + "private int numOfCmdArgs;\n" + "private String[] args;", + Style); +} + TEST_F(FormatTestJava, KeepsDelimitersOnOwnLineInJavaDocComments) { EXPECT_EQ("/**\n" " * javadoc line 1\n" diff --git a/clang/utils/TableGen/CMakeLists.txt b/clang/utils/TableGen/CMakeLists.txt --- a/clang/utils/TableGen/CMakeLists.txt +++ b/clang/utils/TableGen/CMakeLists.txt @@ -1,6 +1,8 @@ set(LLVM_LINK_COMPONENTS Support) -add_tablegen(clang-tblgen CLANG DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" +add_tablegen(clang-tblgen CLANG + DESTINATION "${CLANG_TOOLS_INSTALL_DIR}" + EXPORT Clang ASTTableGen.cpp ClangASTNodesEmitter.cpp ClangASTPropertiesEmitter.cpp diff --git a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp --- a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp +++ b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp @@ -404,17 +404,14 @@ if (!groupInPedantic(Group)) continue; - unsigned ParentsInPedantic = 0; const std::vector &Parents = DiagGroupParents.getParents(Group); - for (unsigned j = 0, ej = Parents.size(); j != ej; ++j) { - if (groupInPedantic(Parents[j])) - ++ParentsInPedantic; - } + bool AllParentsInPedantic = + llvm::all_of(Parents, [&](Record *R) { return groupInPedantic(R); }); // If all the parents are in -Wpedantic, this means that this diagnostic // group will be indirectly included by -Wpedantic already. In that // case, do not add it directly to -Wpedantic. If the group has no // parents, obviously it should go into -Wpedantic. - if (Parents.size() > 0 && ParentsInPedantic == Parents.size()) + if (Parents.size() > 0 && AllParentsInPedantic) continue; if (RecordVec *V = GroupsInPedantic.dyn_cast()) diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -68,6 +68,10 @@ "Build for a bare-metal target.") if (COMPILER_RT_STANDALONE_BUILD) + set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to") + set(CMAKE_CXX_STANDARD_REQUIRED YES) + set(CMAKE_CXX_EXTENSIONS NO) + load_llvm_config() if (TARGET intrinsics_gen) # Loading the llvm config causes this target to be imported so place it diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h --- a/compiler-rt/lib/fuzzer/FuzzerInternal.h +++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h @@ -91,6 +91,7 @@ void HandleMalloc(size_t Size); static void MaybeExitGracefully(); + static int InterruptExitCode(); std::string WriteToOutputCorpus(const Unit &U); private: diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -262,6 +262,11 @@ _Exit(0); } +int Fuzzer::InterruptExitCode() { + assert(F); + return F->Options.InterruptExitCode; +} + void Fuzzer::InterruptCallback() { Printf("==%lu== libFuzzer: run interrupted; exiting\n", GetPid()); PrintFinalStats(); diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp --- a/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp @@ -11,6 +11,7 @@ #if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FREEBSD || \ LIBFUZZER_EMSCRIPTEN #include "FuzzerCommand.h" +#include "FuzzerInternal.h" #include #include @@ -25,6 +26,8 @@ int exit_code = system(CmdLine.c_str()); if (WIFEXITED(exit_code)) return WEXITSTATUS(exit_code); + if (WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGINT) + return Fuzzer::InterruptExitCode(); return exit_code; } diff --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp --- a/compiler-rt/lib/msan/msan_report.cpp +++ b/compiler-rt/lib/msan/msan_report.cpp @@ -37,14 +37,14 @@ static void DescribeStackOrigin(const char *so, uptr pc) { Decorator d; Printf("%s", d.Origin()); - if (so == nullptr) { - Printf(" %sUninitialized value was created in the stack frame%s\n", - d.Origin(), d.Default()); - } else { + if (so) { Printf( " %sUninitialized value was created by an allocation of '%s%s%s'" " in the stack frame%s\n", d.Origin(), d.Name(), so, d.Origin(), d.Default()); + } else { + Printf(" %sUninitialized value was created in the stack frame%s\n", + d.Origin(), d.Default()); } if (pc) diff --git a/compiler-rt/lib/ubsan/CMakeLists.txt b/compiler-rt/lib/ubsan/CMakeLists.txt --- a/compiler-rt/lib/ubsan/CMakeLists.txt +++ b/compiler-rt/lib/ubsan/CMakeLists.txt @@ -192,7 +192,8 @@ add_compiler_rt_runtime(clang_rt.ubsan_standalone STATIC ARCHS ${UBSAN_SUPPORTED_ARCH} - SOURCES ubsan_init_standalone_preinit.cpp + SOURCES + ubsan_init_standalone_preinit.cpp ADDITIONAL_HEADERS ${UBSAN_HEADERS} OBJECT_LIBS RTSanitizerCommon RTSanitizerCommonLibc diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -355,6 +355,10 @@ if (CXX_SUPPORTS_NO_STRING_CONVERSION_FLAG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-string-conversion") endif() +check_compiler_flag("-Wno-ctad-maybe-unsupported" CXX_SUPPORTS_NO_CTAD_MAYBE_UNSUPPORTED_FLAG) +if (CXX_SUPPORTS_NO_CTAD_MAYBE_UNSUPPORTED_FLAG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ctad-maybe-unsupported") +endif() # Add appropriate flags for GCC if (LLVM_COMPILER_IS_GCC_COMPATIBLE) diff --git a/flang/cmake/modules/AddFlang.cmake b/flang/cmake/modules/AddFlang.cmake --- a/flang/cmake/modules/AddFlang.cmake +++ b/flang/cmake/modules/AddFlang.cmake @@ -18,7 +18,7 @@ macro(add_flang_library name) cmake_parse_arguments(ARG - "SHARED;STATIC" + "SHARED;STATIC;INSTALL_WITH_TOOLCHAIN" "" "ADDITIONAL_HEADERS" ${ARGN}) @@ -65,7 +65,8 @@ if (TARGET ${name}) - if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "libflang") + if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "libflang" + OR ARG_INSTALL_WITH_TOOLCHAIN) get_target_export_arg(${name} Flang export_to_flangtargets UMBRELLA flang-libraries) install(TARGETS ${name} COMPONENT ${name} diff --git a/flang/docs/PolymorphicEntities.md b/flang/docs/PolymorphicEntities.md new file mode 100644 --- /dev/null +++ b/flang/docs/PolymorphicEntities.md @@ -0,0 +1,871 @@ +# Polymorphic Entities + +A polymorphic entity is a data entity that can be of different type during the +execution of a program. + +This document aims to give insights at the representation of polymorphic +entities in FIR and how polymorphic related constructs and features are lowered +to FIR. + +## Fortran standard + +Here is a list of the sections and constraints of the Fortran standard involved +for polymorphic entities. + +- 7.3.2.1 - 7.3.2.2: TYPE specifier (TYPE(*)) + - C708 + - C709 + - C710 + - C711 +- 7.3.2.3: CLASS specifier +- 7.5.4.5: The passed-object dummy argument + - C760 +- 9.7.1: ALLOCATE statement + - C933 +- 9.7.2: NULLIFY statement + - When a NULLIFY statement is applied to a polymorphic pointer (7.3.2.3), + its dynamic type becomes the same as its declared type. +- 10.2.2.3: Data pointer assignment +- 11.1.3: ASSOCIATE construct +- 11.1.11: SELECT TYPE construct + - C1157 + - C1158 + - C1159 + - C1160 + - C1161 + - C1162 + - C1163 + - C1164 + - C1165 +- 16.9.76 EXTENDS_TYPE_OF (A, MOLD) +- 16.9.165 SAME_TYPE_AS (A, B) +- 16.9.184 STORAGE_SIZE (A [, KIND]) +- C.10.5 Polymorphic Argument Association (15.5.2.9) + +--- + +## Representation in FIR + +### Polymorphic entities `CLASS(type1)` + +A polymorphic entity is represented as a class type in FIR. In the example below +the dummy argument `p` is passed to the subroutine `foo` as a polymorphic entity +with the extensible type `point`. The type information captured in the class is +the best statically available at compile time. +`!fir.class` is a new type introduced for polymorphic entities. It's similar to +a box type but allows the distinction between a monomorphic and a polymorphic +descriptor. +A specific `BoxTypeInterface` (TypeInterface) can be introduced to share the +same API for both types where it is necessary. `!fir.class` and `!fir.box` can +also be based on a same `BaseBoxType` similar to the `BaseMemRefType` done for +MemRef. + +**Fortran** +```fortran +type point + real :: x, y +end type point + +type, extends(point) :: point_3d + real :: z +end type + +subroutine foo(p) + class(point) :: p + ! code of the subroutine +end subroutine +``` + +**FIR** +```c +func.func @foo(%p : !fir.class>) +``` + +### Unlimited polymorphic entities `CLASS(*)` + +The unlimited polymorphic entity is represented as a class type with `*`. + +**Fortran** +```fortran +subroutine bar(x) + class(*) :: x + ! code of the subroutine +end subroutine +``` + +**FIR** +```c +func.func @bar(%x : !fir.class<*>) +``` + +### Assumed-type `TYPE(*)` + +Assumed type is added in Fortran 2018 and it is available only for dummy +arguments. It's mainly used for interfaces to non-Fortran code and is similar +to C's `void`. It's not part of polymorphic entities directly but it's not +currently implemented in flang. + +Assumed-type is represented as `!fir.type<*>`. + +### SELECT TYPE construct + +The `SELECT TYPE` construct select for execution at most one of its constituent +block. The selection is based on the dynamic type of the selector. + +**Fortran** +```fortran +type point + real :: x, y +end type point +type, extends(point) :: point_3d + real :: z +end type point_3d +type, extends(point) :: color_point + integer :: color +end type color_point + +type(point), target :: p +type(point_3d), target :: p3 +type(color_point), target :: c +class(point), pointer :: p_or_c +p_or_c => c +select type ( a => p_or_c ) +class is (point) + print*, a%x, a%y +type is (point_3d) + print*, a%x, a%y, a%z +class default + print*, +end select +``` + +From the Fortran standard: +> A `TYPE IS` type guard statement matches the selector if the dynamic type +and kind type parameter values of the selector are the same as those specified +by the statement. A `CLASS IS` type guard statement matches the selector if the +dynamic type of the selector is an extension of the type specified by the +statement and the kind type parameter values specified by the statement are the +same as the corresponding type parameter values of the dynamic type of the +selector. + +In the example above the `CLASS IS` type guard is matched. + +The construct is lowered to a specific FIR operation `fir.select_type`. It is +similar to other FIR "select" operations such as `fir.select` and +`fir.select_rank`. The dynamic type of the selector value is matched against a +list of type descriptor. The `TYPE IS` type guard statement is represented by a +`#fir.type_is` attribute and the `CLASS IS` type guard statement is represented +by a `#fir.class_is` attribute. +The `CLASS DEFAULT` type guard statement is represented by a `unit` attribute. + +**FIR** +``` +fir.select_type %p : !fir.class> [ + #fir.class_is>, ^bb1, + #fir.type_is>, ^bb2, + unit, ^bb3] +``` + +Lowering of the `fir.select_type` operation will produce a if-then-else ladder. +The testing of the dynamic type of the selector is done by calling runtime +functions. + +The runtime has two functions to compare dynamic types . Note that this two +functions _ignore_ the values of `KIND` type parameters. A version of these +functions that does not _ignore_ the value of the `KIND` type parameters will +be implemented for the `SELECT TYPE` type guards testing. + +Currently available functions for the `EXTENDS_TYPE_OF` and `SAME_TYPE_AS` +intrinsics (`flang/include/flang/Evaluate/type.h`). +```cpp +std::optional ExtendsTypeOf(const DynamicType &) const; +std::optional SameTypeAs(const DynamicType &) const; +``` + +**FIR** (lower level FIR/MLIR after conversion to an if-then-else ladder) +``` +module { + func @f(%arg0: !fir.class<*>) -> i32 { + %c4_i32 = arith.constant 4 : i32 + %c8_i32 = arith.constant 8 : i32 + %c16_i32 = arith.constant 16 : i32 + %0 = fir.gentypedesc !fir.tdesc>> + %1 = fir.convert %arg0 : (!fir.class>) -> !fir.box + %2 = fir.convert %0 : (!fir.tdesc>>) -> !fir.ref + %3 = fir.call @ExtendsTypeOfWithKind(%1, %2) : (!fir.box, !fir.ref) -> i1 + cond_br %3, ^bb2(%c4_i32 : i32), ^bb1 + ^bb1: // pred: ^bb0 + %4 = fir.gentypedesc !fir.type<_QTpoint_3d{x:f32,y:f32,z:f32}> + %5 = fir.convert %arg0 : (!fir.class>) -> !fir.box + %6 = fir.convert %4 : (!fir.tdesc>) -> !fir.ref + %7 = fir.call @SameTypeAsWithKind(%5, %6) : (!fir.box, !fir.ref) -> i1 + cond_br %7, ^bb4(%c16_i32 : i32), ^bb3 + ^bb2(%8: i32): // pred: ^bb0 + return %8 : i32 + ^bb3: // pred: ^bb1 + br ^bb5(%c8_i32 : i32) + ^bb4(%9: i32): // pred: ^bb1 + %10 = arith.addi %9, %9 : i32 + return %10 : i32 + ^bb5(%11: i32): // pred: ^bb3 + %12 = arith.muli %11, %11 : i32 + return %12 : i32 + } + func private @ExactSameTypeAsWithKind(!fir.box, !fir.ref) -> i1 + func private @SameTypeAsWithKind(!fir.box, !fir.ref) -> i1 +} +``` + +Note: some dynamic type checks can be inlined for performance. Type check with +intrinsic types when dealing with unlimited polymorphic entities is an ideal +candidate for inlined checks. + +--- + +## Dynamic dispatch + +Dynamic dispatch is the process of selecting which implementation of a +polymorphic procedure to call at runtime. The runtime already has information +to be used in this process (more information can be found here: +[RuntimeTypeInfo.md](RuntimeTypeInfo.md)). + +The declaration of the data structures are present in +`flang/runtime/type-info.h`. + +In the example below, there is a basic type `shape` with two type extensions +`triangle` and `rectangle`. +The two type extensions override the `get_area` type-bound procedure. + +**UML** +``` + + |---------------------| + | Shape | + |---------------------| + | + color:integer | + | + isFilled:logical | + |---------------------| + | + init() | + | + get_area():real | + |---------------------| + /\ + /__\ + | + |---------------------------------------------------| + | | + | | +|---------------------| |---------------------| +| triangle | | rectangle | +|---------------------| |---------------------| +| + base:real | | + length:real | +| + height:real | | + width:real | +|---------------------| |---------------------| +| + get_area():real | | + get_area():real | +|---------------------| |---------------------| + +``` + +**Fortran** +```fortran +module geometry +type :: shape + integer :: color + logical :: isFilled +contains + procedure :: get_area => get_area_shape + procedure :: init => init_shape +end type shape + +type, extends(shape) :: triangle + real :: base + real :: height +contains + procedure :: get_area => get_area_triangle +end type triangle + +type, extends(shape) :: rectangle + real :: length + real :: width +contains + procedure :: get_area => get_area_rectangle +end type rectangle + +type shape_array + class(shape), allocatable :: item +end type + +contains + +function get_area_shape(this) + real :: get_area_shape + class(shape) :: this + get_area_shape = 0.0 +end function + +subroutine init_shape(this, color) + class(shape) :: this + integer :: color + this%color = color + this%isFilled = .false. +end subroutine + +function get_area_triangle(this) + real :: get_area_triangle + class(triangle) :: this + get_area_triangle = (this%base * this%height) / 2 +end function + +function get_area_rectangle(this) + real :: get_area_rectangle + class(rectangle) :: this + get_area_rectangle = this%length * this%width +end function + +function get_all_area(shapes) + real :: get_all_area + type(shape_array) :: shapes(:) + real :: sum + integer :: i + + get_all_area = 0.0 + + do i = 1, size(shapes) + get_all_area = get_all_area + shapes(i)%item%get_area() + end do +end function + +subroutine set_base_values(sh, v1, v2) + class(shape) :: sh + real, intent(in) :: v1, v2 + + select type (sh) + type is (triangle) + sh%base = v1 + sh%height = v2 + type is (rectangle) + sh%length = v1 + sh%width = v2 + class default + print*,'Cannot set values' + end select +end subroutine + +end module + +program foo + use geometry + + real :: area + + type(shape_array), dimension(2) :: shapes + + allocate (triangle::shapes(1)%item) + allocate (rectangle::shapes(2)%item) + + do i = 1, size(shapes) + call shapes(i)%item%init(i) + end do + + call set_base_values(shapes(1)%item, 2.0, 1.5) + call set_base_values(shapes(2)%item, 5.0, 4.5) + + area = get_all_area(shapes) + + print*, area + + deallocate(shapes(1)%item) + deallocate(shapes(2)%item) +end program +``` + +The `fir.dispatch` operation is used to perform a dynamic dispatch. This +operation is comparable to the `fir.call` operation but for polymorphic +entities. +Call to `NON_OVERRIDABLE` type-bound procedure are resolved at compile time and +a `fir.call` operation is emitted instead of a `fir.dispatch`. +When the type of a polymorphic entity can be fully determined at compile +time, a `fir.dispatch` op can even be converted to a `fir.call` op. This will +be discussed in more detailed later in the document in the devirtualization +section. + +**FIR** +Here is simple example of the `fir.dispatch` operation. The operation specify +the binding name of the type-bound procedure to be called and pass the +descriptor as argument. If the `NOPASS` attribute is set then the descriptor is +not passed as argument when lowered. If `PASS(arg-name)` is specified, the +`fir.pass` attribute is added to point to the PASS argument in the +`fir.dispatch` operation. `fir.nopass` attribute is added for the `NOPASS`. The +descriptor still need to be present in the `fir.dispatch` operation for the +dynamic dispatch. The CodeGen will then omit the descriptor in the argument +of the generated call. + +The dispatch explanation focus only on the call to `get_area()` as seen in the +example. + +**Fortran** +```fortran +get_all_area = get_all_area + shapes(i)%item%get_area() +``` + +**FIR** +```c +%1 = fir.convert %0 : (!fir.ref,base:f32,height:f32>>>) -> !fir.ref> +%2 = fir.dispatch "get_area"(%1) : (!fir.ref>) -> f32 +``` + +The type information is stored in the `f18Addendum` of the descriptor. The +format is defined in `flang/runtime/type-info.h` and part of its representation +in LLVM IR is shown below. The binding is comparable to a vtable. Each derived +type has a complete type-bound procedure table in which all of the bindings of +its ancestor types appear first. + +**LLVMIR** + +Representation of the derived type information with the bindings. +```c +%_QM__fortran_type_infoTderivedtype = type { { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, { ptr, i64, i32, i8, i8, i8, i8 }, i64, { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, i32, i8, i8, i8, i8, [4 x i8] } +%_QM__fortran_type_infoTbinding = type { %_QM__fortran_builtinsT__builtin_c_funptr, { ptr, i64, i32, i8, i8, i8, i8 } } +%_QM__fortran_builtinsT__builtin_c_funptr = type { i64 } +``` + +The `fir.dispatch` is then lowered to use the runtime information to extract the +correct function from the vtable and to perform the actual call. Here is +what it can look like in pseudo LLVM IR code. + +**LLVMIR** +```c +// Retrieve the bindings (vtable) from the type information from the descriptor +%1 = call %_QM__fortran_type_infoTbinding* @_FortranAGetBindings(%desc) +// Retrieve the position of the specific bindings in the table +%2 = call i32 @_FortranAGetBindingOffset(%1, "get_area") +// Get the binding from the table +%3 = getelementptr %_QM__fortran_type_infoTbinding, %_QM__fortran_type_infoTbinding* %1, i32 0, i32 %2 +// Get the function pointer from the binding +%4 = getelementptr %_QM__fortran_builtinsT__builtin_c_funptr, %_QM__fortran_type_infoTbinding %3, i32 0, i32 0 +// Cast func pointer +%5 = inttoptr i64 %4 to +// Load the function +%6 = load f32(%_QMgeometryTshape*)*, %5 +// Perform the actual function call +%7 = call f32 %6(%_QMgeometryTshape* %shape) +``` + +_Note:_ functions `@_FortranAGetBindings` and `@_FortranAGetBindingOffset` are +not available in the runtime and will need to be implemented. + +- `@_FortranAGetBindings` retrieves the bindings from the descriptor. The + descriptor holds the type information that holds the bindings. +- `@_FortranAGetBindingOffset` retrieves the procedure offset in the bindings + based on the binding name provided. + +Retrieving the binding table and the offset are done separately so multiple +dynamic dispatch on the same polymorphic entities can be optimized (the binding +table is retrieved only once for multiple call). + +### Passing polymorphic entities as argument + +**Fortran** +```fortran +TYPE t1 +END TYPE +TYPE, EXTENDS(t1) :: t2 +END TYPE +``` + +1) Dummy argument is fixed type and actual argument is fixed type. + - `TYPE(t1)` to `TYPE(t1)`: Nothing special to take into consideration. +2) Dummy argument is polymorphic and actual argument is fixed type. In these + cases, the actual argument need to be boxed to be passed to the + subroutine/function since those are expecting a descriptor. + ```c + func.func @_QMmod1Ps(%arg0: !fir.class>) + func.func @_QQmain() { + %0 = fir.alloca !fir.type<_QMmod1Tshape{x:i32,y:i32}> {uniq_name = "_QFEsh"} + %1 = fir.embox %0 : (!fir.ref>) -> !fir.class> + fir.call @_QMmod1Ps(%1) : (!fir.class>) -> () + return + } + ``` + - `TYPE(t1)` to `CLASS(t1)` + - `TYPE(t2)` to `CLASS(t1)` + - `TYPE(t1)` to `CLASS(t2)` - Invalid + - `TYPE(t2)` to `CLASS(t2)` +3) Actual argument is polymorphic and dummy argument is fixed type. These case + are restricted to the declared type of the polymorphic entities. + - The simple case is when the actual argument is a scalar + polymorphic entity passed to a non-PDT. The caller just extract the + base address from the descriptor and pass it to the function. + - In other cases, the caller needs to perform a copyin/copyout since it + cannot just extract the base address of the `CLASS(T)` because it is + likely not contiguous. + - `CLASS(t1)` to `TYPE(t1)` + - `CLASS(t2)` to `TYPE(t1)` - Invalid + - `CLASS(t1)` to `TYPE(t2)` - Invalid + - `CLASS(t2)` to `TYPE(t2)` +4) Both actual and dummy arguments are polymorphic. These particular cases are + straight forward. The function expect polymorphic entities already. + The boxed type is passed without change. + - `CLASS(t1)` to `CLASS(t1)` + - `CLASS(t2)` to `CLASS(t1)` + - `CLASS(t1)` to `CLASS(t2)` - Invalid + - `CLASS(t2)` to `CLASS(t2)` + +### User-Defined Derived Type Input/Output + +User-Defined Derived Type Input/Output allows to define how a derived-type +is read or written from/to a file. + +There are 4 basic subroutines that can be defined: +- Formatted READ +- Formatted WRITE +- Unformatted READ +- Unformatted WRITE + +Here are their respective interfaces: + +**Fortran** +```fortran +subroutine read_formatted(dtv, unit, iotype, v_list, iostat, iomsg) +subroutine write_formatted(dtv, unit, iotype, v_list, iostat, iomsg) +subroutine read_unformatted(dtv, unit, iotype, v_list, iostat, iomsg) +subroutine write_unformatted(dtv, unit, iotype, v_list, iostat, iomsg) +``` + +When defined on a derived-type, these specific type-bound procedures are stored +as special bindings in the type descriptor (see `SpecialBinding` in +`flang/runtime/type-info.h`). + +With a derived-type the function call to `@_FortranAioOutputDescriptor` from IO +runtime will be emitted in lowering. + +**Fortran** +```fortran +type(t) :: x +write(10), x +``` + +**FIR** +```c +%5 = fir.call @_FortranAioBeginUnformattedOutput(%c10_i32, %4, %c56_i32) : (i32, !fir.ref, i32) -> !fir.ref +%6 = fir.embox %2 : (!fir.ref>) -> !fir.class> +%7 = fir.convert %6 : (!fir.class>) -> !fir.box +%8 = fir.call @_FortranAioOutputDescriptor(%5, %7) : (!fir.ref, !fir.box) -> i1 +%9 = fir.call @_FortranAioEndIoStatement(%5) : (!fir.ref) -> i32 +``` + +When dealing with polymorphic entities the call to IO runtime can stay +unchanged. The runtime function `OutputDescriptor` can make the dynamic dispatch +to the correct binding stored in the descriptor. + +### Finalization + +The `FINAL` specifies a final subroutine that might be executed when a data +entity of that type is finalized. Section 7.5.6.3 defines when finalization +occurs. + +Final subroutines like User-Defined Derived Type Input/Output are stored as +special bindings in the type descriptor. The runtime is able to handle the +finalization with a call the the `@_FortranADestroy` function +(`flang/include/flang/Runtime/derived-api.h`). + +**FIR** +```c +%5 = fir.call @_FortranADestroy(%desc) : (!fir.box) -> none +``` + +The `@_FortranADestroy` function will take care to call the final subroutines +and the ones from the parent type. + +Appropriate call to finalization have to be lowered at the right places (7.5.6.3 +When finalization occurs). + +### Devirtualization + +Sometimes there is enough information at compile-time to avoid going through +a dynamic dispatch for a type-bound procedure call on a polymorphic entity. To +be able to perform this optimization directly in FIR the dispatch table is also +present statically with the `fir.dispatch_table` and `fir.dt_entry` operations. + +Here is an example of these operations representing the dispatch tables for the +same example than for the dynamic dispatch. + +**FIR** +``` +fir.dispatch_table @_QMgeometryE.dt.shape { + fir.dt_entry init, @_QMgeometryPinit_shape + fir.dt_entry get_area, @_QMgeometryPget_area_shape +} + +fir.dispatch_table @_QMgeometryE.dt.rectangle { + fir.dt_entry init, @_QMgeometryPinit_shape + fir.dt_entry get_area, @_QMgeometryPget_area_rectangle +} + +fir.dispatch_table @_QMgeometryE.dt.triangle { + fir.dt_entry init, @_QMgeometryPinit_shape + fir.dt_entry get_area, @_QMgeometryPget_area_triangle +} +``` + +With this information, an optimization pass can replace `fir.dispatch` +operations with `fir.call` operations to the correct functions when the type is +know at compile time. + +This is the case in a `type is` type-guard block as illustrated below. + +**Fortran** +```fortran +subroutine get_only_triangle_area(sh) + class(shape) :: sh + real :: area + + select type (sh) + type is (triangle) + area = sh%get_area() + class default + area = 0.0 + end select + +end subroutine +``` + +**FIR** + +The call to `get_area` in the `type is (triangle)` guard can be replaced. +```c +%3 = fir.dispatch "get_area"(%desc) +// Replaced by +%3 = fir.call @get_area_triangle(%desc) +``` + +Another example would be the one below. In this case as well, a dynamic dispatch +is not necessary and a `fir.call` can be emitted instead. + +**Fortran** +```fortran +real :: area +class(shape), pointer :: sh +type(triangle), target :: tr + +sh => tr + +area = sh%get_area() +``` + +Note that the frontend is already replacing some of the dynamic dispatch calls +with the correct static ones. The optimization pass is useful for cases not +handled by the frontend and especially cases showing up after some other +optimizations are applied. + +### `ALLOCATE`/`DEALLOCATE` statements + +The allocation and deallocation of polymorphic entities are delegated to the +runtime. +The corresponding function signatures can be found in +`flang/include/flang/Runtime/allocatable.h` and in +`flang/include/flang/Runtime/pointer.h` for pointer allocation. + +`ALLOCATE` + +The `ALLOCATE` statement is lowered to runtime calls as shown in the example +below. + +**Fortran** +```fortran +allocate(triangle::shapes(1)%item) +allocate(rectangle::shapes(2)%item) +``` + +**FIR** +```c +%0 = fir.alloca !fir.class,base:f32,height:f32>> +%1 = fir.alloca !fir.class,base:f32,height:f32}>> +%3 = fir.convert %0 : (!fir.ref,base:f32,height:f32>>>) -> !fir.ref> +%4 = fir.gentypedesc !fir.type<_QMgeometryTtriangle{color:i32,isFilled:!fir.logical<4>,base:f32,height:f32}>> +%5 = fir.call @_FortranAAllocatableInitDerived(%3, %4) + +%6 = fir.convert %1 : (!fir.ref,base:f32,height:f32}>>>) -> !fir.ref> +%7 = fir.gentypedesc !fir.type<_QMgeometryTtriangle{color:i32,isFilled:!fir.logical<4>,base:f32,height:f32}>> %8 = fir.call @_FortranAAllocatableInitDerived(%6, %7) +``` + +For pointer allocation, the `PointerAllocate` function is used. + +`DEALLOCATE` + +The `DEALLOCATE` statement is lowered to a runtime call to +`AllocatableDeallocate` and `PointerDeallocate` for pointers. + +**Fortran** +```fortran +deallocate(shapes(1)%item) +deallocate(shapes(2)%item) +``` + +**FIR** +```c +%8 = fir.call @_FortranAAllocatableDeallocate(%desc1) +%9 = fir.call @_FortranAAllocatableDeallocate(%desc2) +``` + +### `EXTENDS_TYPE_OF`/`SAME_TYPE_AS` intrinsics + +`EXTENDS_TYPE_OF` and `SAME_TYPE_AS` intrinsics have implementation in the +runtime. Respectively `SameTypeAs` and `ExtendsTypeOf` in +`flang/include/flang/Evaluate/type.h`. + +Both intrinsic functions are lowered to their respective runtime calls. + +### Assignment / Pointer assignment + +Intrinsic assignment of an object to another is already implemented in the +runtime. The function `@_FortranAAsssign` performs the correct operations. + +Available in `flang/include/flang/Runtime/assign.h`. + +### User defined assignment and operator + +**Fortran** +```fortran +module mod1 +type t1 +contains + procedure :: assign_t1 + generic :: assignment(=) => assign_t1 +end type t1 + +type, extends(t1) :: t2 +end type + +contains + +subroutine assign_t1(to, from) + class(t1), intent(inout) :: to + class(t1), intent(in) :: from + ! Custom code for the assignment +end subroutine + +subroutine assign_t2(to, from) + class(t2), intent(inout) :: to + class(t2), intent(in) :: from + ! Custom code for the assignment +end subroutine + +end module + +program main +use mod + +class(t1), allocatable :: v1 +class(t1), allocatable :: v2 + +allocate(t2::v1) +allocate(t2::v2) + +v2 = v1 + +end program +``` + +In the example above the assignment `v2 = v1` is done by a call to `assign_t1`. +This is resolved at compile time since `t2` could not have a generic type-bound +procedure for assignment with an interface that is not distinguishable. This +is the same for user defined operators. + +### `NULLIFY` + +When a `NULLIFY` statement is applied to a polymorphic pointer (7.3.2.3), its +dynamic type becomes the same as its declared type. + +The `NULLIFY` statement is lowered to a call to the corresponding runtime +function `PointerNullifyDerived` in `flang/include/flang/Runtime/pointer.h`. + +### Impact on existing FIR operations dealing with descriptors + +Currently, FIR has a couple of operations taking descriptors as inputs or +producing descriptors as outputs. These operations might need to deal with the +dynamic type of polymorphic entities. + +- `fir.load`/`fir.store` + - Currently a `fir.load` of a `fir.box` is a special case. In the code + generation no copy is made. This could be problematic with polymorphic + entities. When a `fir.load` is performed on a `fir.class` type, the dynamic + can be copied. + + **Fortran** + ```fortran + module mod1 + class(shape), pointer :: a + contains + subroutine sub1(a, b) + class(shape) :: b + associate (b => a) + ! Some more code + end associate + end subroutine + end module + ``` + + In the example above, the dynamic type of `a` and `b` might be different. The + dynamic type of `a` must be copied when it is associated on `b`. + + **FIR** + ```c + // fir.load must copy the dynamic type from the pointer `a` + %0 = fir.address_of(@_QMmod1Ea) : !fir.ref>>> + %1 = fir.load %0 : !fir.ref>>> + ``` + +- `fir.embox` + - The embox operation is used to create a descriptor from a reference. With + polymorphic entities, it is used to create a polymorphic descriptor from + a derived type. The declared type of the descriptor and the derived type + are identical. The dynamic type of the descriptor must be set when it is + created. This is already handled by lowering. + +- `fir.rebox` + - The rebox operation is used to create a new descriptor from a another + descriptor with new optional dimension. If the original descriptor is a + polymorphic entities its dynamic type must be propagated to the new + descriptor. + ``` + %0 = fir.slice %c10, %c33, %c2 : (index, index, index) -> !fir.slice<1> + %1 = fir.shift %c0 : (index) -> !fir.shift<1> + %2 = fir.rebox %x(%1)[%0] : (!fir.class>>, !fir.shift<1>, !fir.slice<1>) -> !fir.class>> + ``` +--- + +# Testing + +- Lowering part is tested with LIT tests in tree +- Polymorphic entities involved a lot of runtime information so executable + tests will be useful for full testing. + +--- + +# Current TODOs +Current list of TODOs in lowering: +- `flang/lib/Lower/Allocatable.cpp:465` not yet implemented: SOURCE allocation +- `flang/lib/Lower/Allocatable.cpp:468` not yet implemented: MOLD allocation +- `flang/lib/Lower/Allocatable.cpp:471` not yet implemented: polymorphic entity allocation +- `flang/lib/Lower/Bridge.cpp:448` not yet implemented: create polymorphic host associated copy +- `flang/lib/Lower/Bridge.cpp:2185` not yet implemented: assignment to polymorphic allocatable +- `flang/lib/Lower/Bridge.cpp:2288` not yet implemented: pointer assignment involving polymorphic entity +- `flang/lib/Lower/Bridge.cpp:2316` not yet implemented: pointer assignment involving polymorphic entity +- `flang/lib/Lower/CallInterface.cpp:795` not yet implemented: support for polymorphic types +- `flang/lib/Lower/ConvertType.cpp:237` not yet implemented: support for polymorphic types + +Current list of TODOs in code generation: + +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:897` not yet implemented: fir.dispatch codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:911` not yet implemented: fir.dispatch_table codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:924` not yet implemented: fir.dt_entry codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:2651` not yet implemented: fir.gentypedesc codegen + +--- + +Resources: +- [1] https://www.pgroup.com/blogs/posts/f03-oop-part1.htm +- [2] https://www.pgroup.com/blogs/posts/f03-oop-part2.htm +- [3] https://www.pgroup.com/blogs/posts/f03-oop-part3.htm +- [4] https://www.pgroup.com/blogs/posts/f03-oop-part4.htm +- [5] Modern Fortran explained diff --git a/flang/lib/Decimal/CMakeLists.txt b/flang/lib/Decimal/CMakeLists.txt --- a/flang/lib/Decimal/CMakeLists.txt +++ b/flang/lib/Decimal/CMakeLists.txt @@ -1,5 +1,5 @@ -add_flang_library(FortranDecimal +add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN binary-to-decimal.cpp decimal-to-binary.cpp ) diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp --- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp @@ -31,11 +31,14 @@ #include "flang/Optimizer/Support/FIRContext.h" #include "flang/Optimizer/Transforms/Passes.h" #include "mlir/IR/Matchers.h" +#include "mlir/IR/TypeUtilities.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/Optional.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "flang-simplify-intrinsics" @@ -159,8 +162,13 @@ /// with signature provided by \p funcOp. The caller is responsible /// for saving/restoring the original insertion point of \p builder. /// \p funcOp is expected to be empty on entry to this function. +/// \p arg1ElementTy and \p arg2ElementTy specify elements types +/// of the underlying array objects - they are used to generate proper +/// element accesses. static void genFortranADotBody(fir::FirOpBuilder &builder, - mlir::func::FuncOp &funcOp) { + mlir::func::FuncOp &funcOp, + mlir::Type arg1ElementTy, + mlir::Type arg2ElementTy) { // function FortranADotProduct_simplified(arr1, arr2) // T, dimension(:) :: arr1, arr2 // T product = 0 @@ -171,14 +179,15 @@ // FortranADotProduct_simplified = product // end function FortranADotProduct_simplified auto loc = mlir::UnknownLoc::get(builder.getContext()); - mlir::Type elementType = funcOp.getResultTypes()[0]; + mlir::Type resultElementType = funcOp.getResultTypes()[0]; builder.setInsertionPointToEnd(funcOp.addEntryBlock()); mlir::IndexType idxTy = builder.getIndexType(); - mlir::Value zero = elementType.isa() - ? builder.createRealConstant(loc, elementType, 0.0) - : builder.createIntegerConstant(loc, elementType, 0); + mlir::Value zero = + resultElementType.isa() + ? builder.createRealConstant(loc, resultElementType, 0.0) + : builder.createIntegerConstant(loc, resultElementType, 0); mlir::Block::BlockArgListType args = funcOp.front().getArguments(); mlir::Value arg1 = args[0]; @@ -187,10 +196,12 @@ mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); fir::SequenceType::Shape flatShape = {fir::SequenceType::getUnknownExtent()}; - mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType); - mlir::Type boxArrTy = fir::BoxType::get(arrTy); - mlir::Value array1 = builder.create(loc, boxArrTy, arg1); - mlir::Value array2 = builder.create(loc, boxArrTy, arg2); + mlir::Type arrTy1 = fir::SequenceType::get(flatShape, arg1ElementTy); + mlir::Type boxArrTy1 = fir::BoxType::get(arrTy1); + mlir::Value array1 = builder.create(loc, boxArrTy1, arg1); + mlir::Type arrTy2 = fir::SequenceType::get(flatShape, arg2ElementTy); + mlir::Type boxArrTy2 = fir::BoxType::get(arrTy2); + mlir::Value array2 = builder.create(loc, boxArrTy2, arg2); // This version takes the loop trip count from the first argument. // If the first argument's box has unknown (at compilation time) // extent, then it may be better to take the extent from the second @@ -216,19 +227,25 @@ mlir::OpBuilder::InsertPoint loopEndPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loop.getBody()); - mlir::Type eleRefTy = builder.getRefType(elementType); + mlir::Type eleRef1Ty = builder.getRefType(arg1ElementTy); mlir::Value index = loop.getInductionVar(); mlir::Value addr1 = - builder.create(loc, eleRefTy, array1, index); + builder.create(loc, eleRef1Ty, array1, index); mlir::Value elem1 = builder.create(loc, addr1); + // Convert to the result type. + elem1 = builder.create(loc, resultElementType, elem1); + + mlir::Type eleRef2Ty = builder.getRefType(arg2ElementTy); mlir::Value addr2 = - builder.create(loc, eleRefTy, array2, index); + builder.create(loc, eleRef2Ty, array2, index); mlir::Value elem2 = builder.create(loc, addr2); + // Convert to the result type. + elem2 = builder.create(loc, resultElementType, elem2); - if (elementType.isa()) + if (resultElementType.isa()) sumVal = builder.create( loc, builder.create(loc, elem1, elem2), sumVal); - else if (elementType.isa()) + else if (resultElementType.isa()) sumVal = builder.create( loc, builder.create(loc, elem1, elem2), sumVal); else @@ -317,6 +334,29 @@ return 0; } +/// Given the call operation's box argument \p val, discover +/// the element type of the underlying array object. +/// \returns the element type or llvm::None if the type cannot +/// be reliably found. +/// We expect that the argument is a result of fir.convert +/// with the destination type of !fir.box. +static llvm::Optional getArgElementType(mlir::Value val) { + mlir::Operation *defOp; + do { + defOp = val.getDefiningOp(); + // Analyze only sequences of convert operations. + if (!mlir::isa(defOp)) + return llvm::None; + val = defOp->getOperand(0); + // The convert operation is expected to convert from one + // box type to another box type. + auto boxType = val.getType().cast(); + auto elementType = fir::unwrapSeqOrBoxedSeqType(boxType); + if (!elementType.isa()) + return elementType; + } while (true); +} + void SimplifyIntrinsicsPass::runOnOperation() { LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n"); mlir::ModuleOp module = getOperation(); @@ -380,11 +420,42 @@ if (!type.isa() && !type.isa()) return; + // Try to find the element types of the boxed arguments. + auto arg1Type = getArgElementType(v1); + auto arg2Type = getArgElementType(v2); + + if (!arg1Type || !arg2Type) + return; + + // Support only floating point and integer arguments + // now (e.g. logical is skipped here). + if (!arg1Type->isa() && + !arg1Type->isa()) + return; + if (!arg2Type->isa() && + !arg2Type->isa()) + return; + auto typeGenerator = [&type](fir::FirOpBuilder &builder) { return genFortranADotType(builder, type); }; + auto bodyGenerator = [&arg1Type, + &arg2Type](fir::FirOpBuilder &builder, + mlir::func::FuncOp &funcOp) { + genFortranADotBody(builder, funcOp, *arg1Type, *arg2Type); + }; + + // Suffix the function name with the element types + // of the arguments. + std::string typedFuncName(funcName); + llvm::raw_string_ostream nameOS(typedFuncName); + nameOS << "_"; + arg1Type->print(nameOS); + nameOS << "_"; + arg2Type->print(nameOS); + mlir::func::FuncOp newFunc = getOrCreateFunction( - builder, funcName, typeGenerator, genFortranADotBody); + builder, typedFuncName, typeGenerator, bodyGenerator); auto newCall = builder.create(loc, newFunc, mlir::ValueRange{v1, v2}); call->replaceAllUsesWith(newCall.getResults()); diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt --- a/flang/runtime/CMakeLists.txt +++ b/flang/runtime/CMakeLists.txt @@ -88,4 +88,6 @@ LINK_LIBS FortranDecimal + + INSTALL_WITH_TOOLCHAIN ) diff --git a/flang/runtime/FortranMain/CMakeLists.txt b/flang/runtime/FortranMain/CMakeLists.txt --- a/flang/runtime/FortranMain/CMakeLists.txt +++ b/flang/runtime/FortranMain/CMakeLists.txt @@ -1,3 +1,3 @@ -add_flang_library(Fortran_main STATIC +add_flang_library(Fortran_main STATIC INSTALL_WITH_TOOLCHAIN Fortran_main.c ) diff --git a/flang/test/Lower/OpenACC/acc-data-operands.f90 b/flang/test/Lower/OpenACC/acc-data-operands.f90 --- a/flang/test/Lower/OpenACC/acc-data-operands.f90 +++ b/flang/test/Lower/OpenACC/acc-data-operands.f90 @@ -113,12 +113,88 @@ end subroutine -subroutine acc_operand_array_section2(a) - real, dimension(100) :: a +! Testing array sections on allocatable array +subroutine acc_operand_array_section_allocatable() + real, allocatable :: a(:) + + allocate(a(100)) + + !$acc data copyin(a(1:50)) copyout(a(51:100)) + !$acc end data + + !CHECK: %[[ARR_HEAP:.*]] = fir.alloca !fir.heap> {uniq_name = "_QMacc_data_operandFacc_operand_array_section_allocatableEa.addr"} + + !CHECK: %[[LOAD_ARR0:.*]] = fir.load %[[ARR_HEAP]] : !fir.ref>> + !CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 + !CHECK: %[[C1_I64:.*]] = fir.convert %[[C1_I32]] : (i32) -> i64 + !CHECK: %[[LB0:.*]] = fir.convert %[[C1_I64]] : (i64) -> index + !CHECK: %[[C1_STEP:.*]] = arith.constant 1 : i64 + !CHECK: %[[STEP0:.*]] = fir.convert %[[C1_STEP]] : (i64) -> index + !CHECK: %[[C50_I32:.*]] = arith.constant 50 : i32 + !CHECK: %[[C50_I64:.*]] = fir.convert %[[C50_I32]] : (i32) -> i64 + !CHECK: %[[UB0:.*]] = fir.convert %[[C50_I64]] : (i64) -> index + !CHECK: %[[SHAPE_SHIFT0:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1> + !CHECK: %[[SLICE0:.*]] = fir.slice %[[LB0]], %[[UB0]], %[[STEP0]] : (index, index, index) -> !fir.slice<1> + !CHECK: %[[ARR_SECTION0:.*]] = fir.embox %[[LOAD_ARR0]](%[[SHAPE_SHIFT0]]) [%[[SLICE0]]] : (!fir.heap>, !fir.shapeshift<1>, !fir.slice<1>) -> !fir.box> + !CHECK: %[[MEM0:.*]] = fir.alloca !fir.box> + !CHECK: fir.store %[[ARR_SECTION0]] to %[[MEM0]] : !fir.ref>> + + !CHECK: %[[LOAD_ARR1:.*]] = fir.load %[[ARR_HEAP]] : !fir.ref>> + !CHECK: %[[C51_I32:.*]] = arith.constant 51 : i32 + !CHECK: %[[C51_I64:.*]] = fir.convert %[[C51_I32]] : (i32) -> i64 + !CHECK: %[[LB1:.*]] = fir.convert %[[C51_I64]] : (i64) -> index + !CHECK: %[[C1_STEP:.*]] = arith.constant 1 : i64 + !CHECK: %[[STEP1:.*]] = fir.convert %[[C1_STEP]] : (i64) -> index + !CHECK: %[[C100_I32:.*]] = arith.constant 100 : i32 + !CHECK: %[[C100_I64:.*]] = fir.convert %[[C100_I32]] : (i32) -> i64 + !CHECK: %[[UB1:.*]] = fir.convert %[[C100_I64]] : (i64) -> index + !CHECK: %[[SHAPE_SHIFT1:.*]] = fir.shape_shift %{{.*}}, %{{.*}} : (index, index) -> !fir.shapeshift<1> + !CHECK: %[[SLICE1:.*]] = fir.slice %[[LB1]], %[[UB1]], %[[STEP1]] : (index, index, index) -> !fir.slice<1> + !CHECK: %[[ARR_SECTION1:.*]] = fir.embox %[[LOAD_ARR1]](%[[SHAPE_SHIFT1]]) [%[[SLICE1]]] : (!fir.heap>, !fir.shapeshift<1>, !fir.slice<1>) -> !fir.box> + !CHECK: %[[MEM1:.*]] = fir.alloca !fir.box> + !CHECK: fir.store %[[ARR_SECTION1]] to %[[MEM1]] : !fir.ref>> + + !CHECK: acc.data copyin(%[[MEM0]] : !fir.ref>>) copyout(%[[MEM1]] : !fir.ref>>) + + deallocate(a) +end subroutine - !$acc data copyin(a) + +! Testing array sections on pointer array +subroutine acc_operand_array_section_pointer() + real, target :: a(100) + real, pointer :: p(:) + + p => a + + !$acc data copyin(p(1:50)) !$acc end data + !CHECK: %[[C100:.*]] = arith.constant 100 : index + !CHECK: %[[ARR:.*]] = fir.alloca !fir.array<100xf32> {bindc_name = "a", fir.target, uniq_name = "_QMacc_data_operandFacc_operand_array_section_pointerEa"} + !CHECK: %[[PTR:.*]] = fir.alloca !fir.box>> {bindc_name = "p", uniq_name = "_QMacc_data_operandFacc_operand_array_section_pointerEp"} + !CHECK: %[[SHAPE0:.*]] = fir.shape %[[C100]] : (index) -> !fir.shape<1> + !CHECK: %[[EMBOX0:.*]] = fir.embox %[[ARR]](%[[SHAPE0]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box>> + !CHECK: fir.store %[[EMBOX0]] to %[[PTR]] : !fir.ref>>> + !CHECK: %[[PTR_LOAD:.*]] = fir.load %[[PTR]] : !fir.ref>>> + !CHECK: %[[C0:.*]] = arith.constant 0 : index + !CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[PTR_LOAD]], %[[C0]] : (!fir.box>>, index) -> (index, index, index) + !CHECK: %[[C1_I32:.*]] = arith.constant 1 : i32 + !CHECK: %[[C1_I64:.*]] = fir.convert %[[C1_I32]] : (i32) -> i64 + !CHECK: %[[LB0:.*]] = fir.convert %[[C1_I64]] : (i64) -> index + !CHECK: %[[C1_STEP:.*]] = arith.constant 1 : i64 + !CHECK: %[[STEP0:.*]] = fir.convert %[[C1_STEP]] : (i64) -> index + !CHECK: %[[C50_I32:.*]] = arith.constant 50 : i32 + !CHECK: %[[C50_I64:.*]] = fir.convert %[[C50_I32]] : (i32) -> i64 + !CHECK: %[[UB0:.*]] = fir.convert %[[C50_I64]] : (i64) -> index + !CHECK: %[[SHIFT0:.*]] = fir.shift %[[BOX_DIMS]]#0 : (index) -> !fir.shift<1> + !CHECK: %[[SLICE0:.*]] = fir.slice %[[LB0]], %[[UB0]], %[[STEP0]] : (index, index, index) -> !fir.slice<1> + !CHECK: %[[REBOX0:.*]] = fir.rebox %7(%[[SHIFT0]]) [%[[SLICE0]]] : (!fir.box>>, !fir.shift<1>, !fir.slice<1>) -> !fir.box> + !CHECK: %[[MEM0:.*]] = fir.alloca !fir.box> + !CHECK: fir.store %[[REBOX0]] to %[[MEM0]] : !fir.ref>> + + !CHECK: acc.data copyin(%[[MEM0]] : !fir.ref>>) { + end subroutine end module diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir --- a/flang/test/Transforms/simplifyintrinsics.fir +++ b/flang/test/Transforms/simplifyintrinsics.fir @@ -344,15 +344,15 @@ // CHECK: %[[RESLOC:.*]] = fir.alloca f32 {bindc_name = "dot", uniq_name = "_QFdotEdot"} // CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box>) -> !fir.box // CHECK: %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box>) -> !fir.box -// CHECK: %[[RES:.*]] = fir.call @_FortranADotProductReal4_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box, !fir.box) -> f32 +// CHECK: %[[RES:.*]] = fir.call @_FortranADotProductReal4_f32_f32_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box, !fir.box) -> f32 // CHECK: fir.store %[[RES]] to %[[RESLOC]] : !fir.ref // CHECK: %[[RET:.*]] = fir.load %[[RESLOC]] : !fir.ref // CHECK: return %[[RET]] : f32 // CHECK: } -// CHECK-LABEL: func.func private @_FortranADotProductReal4_simplified( -// CHECK-SAME: %[[A:.*]]: !fir.box, -// CHECK-SAME: %[[B:.*]]: !fir.box) -> f32 attributes {llvm.linkage = #llvm.linkage} { +// CHECK-LABEL: func.func private @_FortranADotProductReal4_f32_f32_simplified( +// CHECK-SAME: %[[A:.*]]: !fir.box, +// CHECK-SAME: %[[B:.*]]: !fir.box) -> f32 attributes {llvm.linkage = #llvm.linkage} { // CHECK: %[[FZERO:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[IZERO:.*]] = arith.constant 0 : index // CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box) -> !fir.box> @@ -363,9 +363,11 @@ // CHECK: %[[RES:.*]] = fir.do_loop %[[IDX:.*]] = %[[IZERO]] to %[[LEN]] step %[[IONE]] iter_args(%[[SUM:.*]] = %[[FZERO]]) -> (f32) { // CHECK: %[[ALOC:.*]] = fir.coordinate_of %[[ACAST]], %[[IDX]] : (!fir.box>, index) -> !fir.ref // CHECK: %[[AVAL:.*]] = fir.load %[[ALOC]] : !fir.ref +// CHECK: %[[AVALCAST:.*]] = fir.convert %[[AVAL]] : (f32) -> f32 // CHECK: %[[BLOC:.*]] = fir.coordinate_of %[[BCAST]], %[[IDX]] : (!fir.box>, index) -> !fir.ref // CHECK: %[[BVAL:.*]] = fir.load %[[BLOC]] : !fir.ref -// CHECK: %[[MUL:.*]] = arith.mulf %[[AVAL]], %[[BVAL]] : f32 +// CHECK: %[[BVALCAST:.*]] = fir.convert %[[BVAL]] : (f32) -> f32 +// CHECK: %[[MUL:.*]] = arith.mulf %[[AVALCAST]], %[[BVALCAST]] : f32 // CHECK: %[[NEWSUM:.*]] = arith.addf %[[MUL]], %[[SUM]] : f32 // CHECK: fir.result %[[NEWSUM]] : f32 // CHECK: } @@ -479,15 +481,15 @@ // CHECK: %[[RESLOC:.*]] = fir.alloca i32 {bindc_name = "dot", uniq_name = "_QFdotEdot"} // CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box>) -> !fir.box // CHECK: %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box>) -> !fir.box -// CHECK: %[[RES:.*]] = fir.call @_FortranADotProductInteger4_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box, !fir.box) -> i32 +// CHECK: %[[RES:.*]] = fir.call @_FortranADotProductInteger4_i32_i32_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box, !fir.box) -> i32 // CHECK: fir.store %[[RES]] to %[[RESLOC]] : !fir.ref // CHECK: %[[RET:.*]] = fir.load %[[RESLOC]] : !fir.ref // CHECK: return %[[RET]] : i32 // CHECK: } -// CHECK-LABEL: func.func private @_FortranADotProductInteger4_simplified( -// CHECK-SAME: %[[A:.*]]: !fir.box, -// CHECK-SAME: %[[B:.*]]: !fir.box) -> i32 attributes {llvm.linkage = #llvm.linkage} { +// CHECK-LABEL: func.func private @_FortranADotProductInteger4_i32_i32_simplified( +// CHECK-SAME: %[[A:.*]]: !fir.box, +// CHECK-SAME: %[[B:.*]]: !fir.box) -> i32 attributes {llvm.linkage = #llvm.linkage} { // CHECK: %[[I32ZERO:.*]] = arith.constant 0 : i32 // CHECK: %[[IZERO:.*]] = arith.constant 0 : index // CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box) -> !fir.box> @@ -498,9 +500,11 @@ // CHECK: %[[RES:.*]] = fir.do_loop %[[IDX:.*]] = %[[IZERO]] to %[[LEN]] step %[[IONE]] iter_args(%[[SUM:.*]] = %[[I32ZERO]]) -> (i32) { // CHECK: %[[ALOC:.*]] = fir.coordinate_of %[[ACAST]], %[[IDX]] : (!fir.box>, index) -> !fir.ref // CHECK: %[[AVAL:.*]] = fir.load %[[ALOC]] : !fir.ref +// CHECK: %[[AVALCAST:.*]] = fir.convert %[[AVAL]] : (i32) -> i32 // CHECK: %[[BLOC:.*]] = fir.coordinate_of %[[BCAST]], %[[IDX]] : (!fir.box>, index) -> !fir.ref // CHECK: %[[BVAL:.*]] = fir.load %[[BLOC]] : !fir.ref -// CHECK: %[[MUL:.*]] = arith.muli %[[AVAL]], %[[BVAL]] : i32 +// CHECK: %[[BVALCAST:.*]] = fir.convert %[[BVAL]] : (i32) -> i32 +// CHECK: %[[MUL:.*]] = arith.muli %[[AVALCAST]], %[[BVALCAST]] : i32 // CHECK: %[[NEWSUM:.*]] = arith.addi %[[MUL]], %[[SUM]] : i32 // CHECK: fir.result %[[NEWSUM]] : i32 // CHECK: } @@ -587,3 +591,63 @@ // CHECK-SAME: %[[A:.*]]: !fir.box> {fir.bindc_name = "a"}, // CHECK-SAME: %[[B:.*]]: !fir.box> {fir.bindc_name = "b"}) -> i64 { // CHECK-NOT: call{{.*}}_FortranADotProductInteger8( + +// ----- + +// Test mixed types, e.g. when _FortranADotProductReal8 is called +// with and arguments. The loaded elements must be converted +// to the result type REAL(8) before the computations. + +func.func @dot_f64_f32(%arg0: !fir.box> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "b"}) -> f64 { + %0 = fir.alloca f64 {bindc_name = "dot", uniq_name = "_QFdotEdot"} + %1 = fir.address_of(@_QQcl.2E2F646F742E66393000) : !fir.ref> + %c3_i32 = arith.constant 3 : i32 + %2 = fir.convert %arg0 : (!fir.box>) -> !fir.box + %3 = fir.convert %arg1 : (!fir.box>) -> !fir.box + %4 = fir.convert %1 : (!fir.ref>) -> !fir.ref + %5 = fir.call @_FortranADotProductReal8(%2, %3, %4, %c3_i32) : (!fir.box, !fir.box, !fir.ref, i32) -> f64 + fir.store %5 to %0 : !fir.ref + %6 = fir.load %0 : !fir.ref + return %6 : f64 +} +func.func private @_FortranADotProductReal4(!fir.box, !fir.box, !fir.ref, i32) -> f32 attributes {fir.runtime} +fir.global linkonce @_QQcl.2E2F646F742E66393000 constant : !fir.char<1,10> { + %0 = fir.string_lit "./dot.f90\00"(10) : !fir.char<1,10> + fir.has_value %0 : !fir.char<1,10> +} + +// CHECK-LABEL: func.func @dot_f64_f32( +// CHECK-SAME: %[[A:.*]]: !fir.box> {fir.bindc_name = "a"}, +// CHECK-SAME: %[[B:.*]]: !fir.box> {fir.bindc_name = "b"}) -> f64 { +// CHECK: %[[RESLOC:.*]] = fir.alloca f64 {bindc_name = "dot", uniq_name = "_QFdotEdot"} +// CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box>) -> !fir.box +// CHECK: %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box>) -> !fir.box +// CHECK: %[[RES:.*]] = fir.call @_FortranADotProductReal8_f64_f32_simplified(%[[ACAST]], %[[BCAST]]) : (!fir.box, !fir.box) -> f64 +// CHECK: fir.store %[[RES]] to %[[RESLOC]] : !fir.ref +// CHECK: %[[RET:.*]] = fir.load %[[RESLOC]] : !fir.ref +// CHECK: return %[[RET]] : f64 +// CHECK: } + +// CHECK-LABEL: func.func private @_FortranADotProductReal8_f64_f32_simplified( +// CHECK-SAME: %[[A:.*]]: !fir.box, +// CHECK-SAME: %[[B:.*]]: !fir.box) -> f64 attributes {llvm.linkage = #llvm.linkage} { +// CHECK: %[[FZERO:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK: %[[IZERO:.*]] = arith.constant 0 : index +// CHECK: %[[ACAST:.*]] = fir.convert %[[A]] : (!fir.box) -> !fir.box> +// CHECK: %[[BCAST:.*]] = fir.convert %[[B]] : (!fir.box) -> !fir.box> +// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ACAST]], %[[IZERO]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[IONE:.*]] = arith.constant 1 : index +// CHECK: %[[LEN:.*]] = arith.subi %[[DIMS]]#1, %[[IONE]] : index +// CHECK: %[[RES:.*]] = fir.do_loop %[[IDX:.*]] = %[[IZERO]] to %[[LEN]] step %[[IONE]] iter_args(%[[SUM:.*]] = %[[FZERO]]) -> (f64) { +// CHECK: %[[ALOC:.*]] = fir.coordinate_of %[[ACAST]], %[[IDX]] : (!fir.box>, index) -> !fir.ref +// CHECK: %[[AVAL:.*]] = fir.load %[[ALOC]] : !fir.ref +// CHECK: %[[AVALCAST:.*]] = fir.convert %[[AVAL]] : (f64) -> f64 +// CHECK: %[[BLOC:.*]] = fir.coordinate_of %[[BCAST]], %[[IDX]] : (!fir.box>, index) -> !fir.ref +// CHECK: %[[BVAL:.*]] = fir.load %[[BLOC]] : !fir.ref +// CHECK: %[[BVALCAST:.*]] = fir.convert %[[BVAL]] : (f32) -> f64 +// CHECK: %[[MUL:.*]] = arith.mulf %[[AVALCAST]], %[[BVALCAST]] : f64 +// CHECK: %[[NEWSUM:.*]] = arith.addf %[[MUL]], %[[SUM]] : f64 +// CHECK: fir.result %[[NEWSUM]] : f64 +// CHECK: } +// CHECK: return %[[RES]] : f64 +// CHECK: } diff --git a/libc/config/linux/CMakeLists.txt b/libc/config/linux/CMakeLists.txt --- a/libc/config/linux/CMakeLists.txt +++ b/libc/config/linux/CMakeLists.txt @@ -1,6 +1,6 @@ -add_header( +add_header_library( app_h - HDR + HDRS app.h DEPENDS libc.src.__support.common diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -91,6 +91,10 @@ libc.src.stdlib.realloc libc.src.stdlib.free + # stdio.h entrypoints + libc.src.stdio.sprintf + libc.src.stdio.snprintf + # sys/stat.h entrypoints libc.src.sys.stat.mkdir libc.src.sys.stat.mkdirat @@ -242,8 +246,6 @@ libc.src.stdio.funlockfile libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked - libc.src.stdio.sprintf - libc.src.stdio.snprintf libc.src.stdio.fprintf libc.src.stdio.printf libc.src.stdio.stderr diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -92,6 +92,10 @@ libc.src.stdlib.aligned_alloc libc.src.stdlib.free + # stdio.h entrypoints + libc.src.stdio.sprintf + libc.src.stdio.snprintf + # sys/mman.h entrypoints libc.src.sys.mman.mmap libc.src.sys.mman.munmap @@ -298,8 +302,6 @@ libc.src.stdio.funlockfile libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked - libc.src.stdio.sprintf - libc.src.stdio.snprintf libc.src.stdio.fprintf libc.src.stdio.printf libc.src.stdio.stderr diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -1,3 +1,5 @@ +add_header(off64_t HDR off64_t.h) +add_header(size_t HDR size_t.h) add_header(__bsearchcompare_t HDR __bsearchcompare_t.h) add_header(__call_once_func_t HDR __call_once_func_t.h) add_header(__futex_word HDR __futex_word.h) @@ -23,14 +25,12 @@ add_header(mode_t HDR mode_t.h) add_header(mtx_t HDR mtx_t.h DEPENDS .__futex_word .__mutex_type) add_header(off_t HDR off_t.h) -add_header(off64_t HDR off64_t.h) add_header(once_flag HDR once_flag.h DEPENDS .__futex_word) add_header(pthread_attr_t HDR pthread_attr_t.h DEPENDS .size_t) add_header(pthread_key_t HDR pthread_key_t.h) add_header(pthread_mutex_t HDR pthread_mutex_t.h DEPENDS .__futex_word .__mutex_type) add_header(pthread_t HDR pthread_t.h DEPENDS .__thread_type) add_header(pthread_mutexattr_t HDR pthread_mutexattr_t.h) -add_header(size_t HDR size_t.h) add_header(ssize_t HDR ssize_t.h) add_header(struct_dirent HDR struct_dirent.h DEPENDS .ino_t .off_t) add_header(struct_sigaction HDR struct_sigaction.h) diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(math) add_subdirectory(string) add_subdirectory(stdlib) +add_subdirectory(stdio) if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(dirent) @@ -24,6 +25,5 @@ # since assert uses the signal API, we disable assert also. # add_subdirectory(assert) # add_subdirectory(signal) -add_subdirectory(stdio) add_subdirectory(threads) add_subdirectory(time) diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -31,17 +31,6 @@ .core_structs ) -add_object_library( - file_writer - SRCS - file_writer.cpp - HDRS - file_writer.h - DEPENDS - libc.src.__support.File.file - .core_structs -) - add_object_library( writer SRCS @@ -91,6 +80,23 @@ libc.src.__support.arg_list ) +if(NOT (TARGET libc.src.__support.File.file)) + # Not all platforms have a file implementation. If file is unvailable, + # then we must skip all file based printf sections. + return() +endif() + +add_object_library( + file_writer + SRCS + file_writer.cpp + HDRS + file_writer.h + DEPENDS + libc.src.__support.File.file + .core_structs +) + add_object_library( vfprintf_internal SRCS diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -34,6 +34,7 @@ add_subdirectory(math) add_subdirectory(string) add_subdirectory(stdlib) +add_subdirectory(stdio) if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(fcntl) @@ -50,7 +51,6 @@ # since assert uses the signal API, we disable assert also. # add_subdirectory(assert) # add_subdirectory(signal) -add_subdirectory(stdio) add_subdirectory(time) if(${LIBC_TARGET_OS} STREQUAL "linux") diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -191,7 +191,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) { __llvm_libc::printf_core::FormatSection format_arr[10]; const char *str = "%lld"; - int arg1 = 12345; + long long arg1 = 12345; evaluate(format_arr, str, arg1); __llvm_libc::printf_core::FormatSection expected; @@ -208,7 +208,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) { __llvm_libc::printf_core::FormatSection format_arr[10]; const char *str = "% -056.78jd"; - int arg1 = 12345; + intmax_t arg1 = 12345; evaluate(format_arr, str, arg1); __llvm_libc::printf_core::FormatSection expected; diff --git a/libc/utils/UnitTest/CMakeLists.txt b/libc/utils/UnitTest/CMakeLists.txt --- a/libc/utils/UnitTest/CMakeLists.txt +++ b/libc/utils/UnitTest/CMakeLists.txt @@ -54,13 +54,6 @@ libc.src.__support.CPP.array_ref ) -if(NOT LLVM_LIBC_FULL_BUILD) # TODO(michaelrj): make a more permanant solution. - return() -endif() - -#currently stdio is fullbuild only, so this matcher that depends on a piece of -#printf also has to be fullbuild only. - add_library( LibcPrintfHelpers PrintfMatcher.h diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -592,6 +592,7 @@ -Wno-user-defined-literals -Wno-covered-switch-default -Wno-suggest-override + -Wno-ctad-maybe-unsupported ) if (LIBCXX_TARGETING_CLANG_CL) target_add_compile_flags_if_supported(${target} PRIVATE diff --git a/libcxx/docs/Status/SpaceshipProjects.csv b/libcxx/docs/Status/SpaceshipProjects.csv --- a/libcxx/docs/Status/SpaceshipProjects.csv +++ b/libcxx/docs/Status/SpaceshipProjects.csv @@ -15,9 +15,9 @@ | `[type.info] `_,| `typeinfo `_,None,Adrian Vogelsgesang,|Complete| | `[coroutine.handle.compare] `_,| `coroutine_handle `_,[comparisons.three.way],Chuanqi Xu,|Complete| | `[pairs.spec] `_,| `pair `_,[expos.only.func],Kent Ross,|Complete| -| `[syserr.errcat.nonvirtuals] `_,| `error_category `_,[comparisons.three.way],Adrian Vogelsgesang,|In Progress| +| `[syserr.errcat.nonvirtuals] `_,| `error_category `_,[comparisons.three.way],Adrian Vogelsgesang,|Complete| | `[syserr.compare] `_,"| `error_code `_ -| `error_condition `_",None,Adrian Vogelsgesang,|In Progress| +| `error_condition `_",None,Adrian Vogelsgesang,|Complete| | `[tuple.rel] `_,| `tuple `_,[expos.only.func],Kent Ross,|Complete| "| `[optional.relops] `_ | `[optional.nullops] `_ @@ -29,31 +29,31 @@ | `[unique.ptr.special] `_,| `unique_ptr `_,[comparisons.three.way],Adrian Vogelsgesang,|Complete| | `[util.smartptr.shared.cmp] `_,| `shared_ptr `_,[comparisons.three.way],Adrian Vogelsgesang,|Complete| | `[type.index.members] `_,| `type_index `_,None,Adrian Vogelsgesang,|Complete| -| `[charconv.syn] `_,| to_chars_result,None,Mark de Wever,|Complete| -| `[charconv.syn] `_,| from_chars_result,None,Mark de Wever,|Complete| +| `[charconv.syn] `_,| `to_chars_result `_,None,Mark de Wever,|Complete| +| `[charconv.syn] `_,| `from_chars_result `_,None,Mark de Wever,|Complete| | `[stacktrace.entry.cmp] `_,| stacktrace_entry,None,Unassigned,|Not Started| | `[stacktrace.basic.cmp] `_,| basic_stacktrace,[alg.three.way],Unassigned,|Not Started| -| `[string.cmp] `_,| `basic_string `,None,Mark de Wever,|Complete| +| `[string.cmp] `_,| `basic_string `_,None,Mark de Wever,|Complete| | `[string.view.comparison] `_,| `basic_string_view `_,None,Mark de Wever,|Complete| -| `[array.syn] `_ (`general `_),| array,[expos.only.func],Unassigned,|Not Started| -| `[deque.syn] `_ (`general `_),| deque,[expos.only.func],Unassigned,|Not Started| -| `[forward.list.syn] `_ (`general `_),| forward_list,[expos.only.func],Unassigned,|Not Started| -| `[list.syn] `_ (`general `_),| list,[expos.only.func],Unassigned,|Not Started| -| `[vector.syn] `_ (`general `_),| vector,[expos.only.func],Unassigned,|Not Started| -| `[associative.map.syn] `_ (`general `_),"| map +| `[array.syn] `_ (`general `_),| array,[expos.only.func],Unassigned,|Not Started| +| `[deque.syn] `_ (`general `_),| deque,[expos.only.func],Unassigned,|Not Started| +| `[forward.list.syn] `_ (`general `_),| forward_list,[expos.only.func],Unassigned,|Not Started| +| `[list.syn] `_ (`general `_),| list,[expos.only.func],Unassigned,|Not Started| +| `[vector.syn] `_ (`general `_),| vector,[expos.only.func],Unassigned,|Not Started| +| `[associative.map.syn] `_ (`general `_),"| map | multimap",[expos.only.func],Unassigned,|Not Started| -| `[associative.set.syn] `_ (`general `_),"| multiset +| `[associative.set.syn] `_ (`general `_),"| multiset | set",[expos.only.func],Unassigned,|Not Started| | `[queue.ops] `_,| queue,None,Unassigned,|Not Started| | `[stack.ops] `_,| stack,None,Unassigned,|Not Started| -| `[reverse.iter.cmp] `_,| reverse_iterator,None,Mikhail Maltsev,|Complete| +| `[reverse.iter.cmp] `_,| `reverse_iterator `_,None,Mikhail Maltsev,|Complete| | `[move.iter.op.comp] `_,| move_iterator,None,Unassigned,|Not Started| | `[counted.iter.cmp] `_,| counted_iterator,None,Unassigned,|Not Started| | `[range.iota.iterator] `_,| `ranges::iota_view::iterator `_,[concepts.cmp],Arthur O'Dwyer,|Complete| | `[range.transform.iterator] `_,| `ranges::transform_view::iterator `_,[concepts.cmp],Arthur O'Dwyer,|Complete| | `[range.elements.iterator] `_,| ranges::elements_view::iterator,[concepts.cmp],Unassigned,|Not Started| | `[time.duration.comparisons] `_, "chrono::duration", None, Mark de Wever, |Not Started| -| `[time.point.comparisons] `_, "chrono::point", None, Mark de Wever, |Not Started| +| `[time.point.comparisons] `_, "chrono::time_point", None, Mark de Wever, |Not Started| "| `[time.cal.day.nonmembers] `_ | `[time.cal.month.nonmembers] `_ | `[time.cal.year.nonmembers] `_ @@ -61,14 +61,14 @@ | `[time.cal.mdlast] `_ | `[time.cal.ym.nonmembers] `_ | `[time.cal.ymd.nonmembers] `_ -| `[time.cal.ymdlast.nonmembers] `_","| chrono::day -| chrono::month -| chrono::year -| chrono::month_day -| chrono::month_day_last -| chrono::year_month -| chrono::year_month_day -| chrono::year_month_day_last",None,Mark de Wever,|Complete| +| `[time.cal.ymdlast.nonmembers] `_","| `chrono::day `_ +| `chrono::month `_ +| `chrono::year `_ +| `chrono::month_day `_ +| `chrono::month_day_last `_ +| `chrono::year_month `_ +| `chrono::year_month_day `_ +| `chrono::year_month_day_last `_",None,Mark de Wever,|Complete| "| `[time.zone.nonmembers] `_ | `[time.zone.leap.nonmembers] `_ | `[time.zone.link.nonmembers] `_","| chrono::time_zone diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h --- a/libcxx/include/__format/buffer.h +++ b/libcxx/include/__format/buffer.h @@ -11,8 +11,10 @@ #define _LIBCPP___FORMAT_BUFFER_H #include <__algorithm/copy_n.h> +#include <__algorithm/fill_n.h> #include <__algorithm/max.h> #include <__algorithm/min.h> +#include <__algorithm/transform.h> #include <__algorithm/unwrap_iter.h> #include <__config> #include <__format/enable_insertable.h> @@ -26,6 +28,7 @@ #include <__utility/move.h> #include #include +#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -69,8 +72,6 @@ return back_insert_iterator{*this}; } - // TODO FMT It would be nice to have an overload taking a - // basic_string_view<_CharT> and append it directly. _LIBCPP_HIDE_FROM_ABI void push_back(_CharT __c) { __ptr_[__size_++] = __c; @@ -80,6 +81,95 @@ flush(); } + /// Copies the input __str to the buffer. + /// + /// Since some of the input is generated by std::to_chars, there needs to be a + /// conversion when _CharT is wchar_t. + template <__formatter::__char_type _InCharT> + _LIBCPP_HIDE_FROM_ABI void __copy(basic_string_view<_InCharT> __str) { + // When the underlying iterator is a simple iterator the __capacity_ is + // infinite. For a string or container back_inserter it isn't. This means + // adding a large string the the buffer can cause some overhead. In that + // case a better approach could be: + // - flush the buffer + // - container.append(__str.begin(), __str.end()); + // The same holds true for the fill. + // For transform it might be slightly harder, however the use case for + // transform is slightly less common; it converts hexadecimal values to + // upper case. For integral these strings are short. + // TODO FMT Look at the improvements above. + size_t __n = __str.size(); + + __flush_on_overflow(__n); + if (__n <= __capacity_) { + _VSTD::copy_n(__str.data(), __n, _VSTD::addressof(__ptr_[__size_])); + __size_ += __n; + return; + } + + // The output doesn't fit in the internal buffer. + // Copy the data in "__capacity_" sized chunks. + _LIBCPP_ASSERT(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); + const _InCharT* __first = __str.data(); + do { + size_t __chunk = _VSTD::min(__n, __capacity_); + _VSTD::copy_n(__first, __chunk, _VSTD::addressof(__ptr_[__size_])); + __size_ = __chunk; + __first += __chunk; + __n -= __chunk; + flush(); + } while (__n); + } + + /// A std::transform wrapper. + /// + /// Like @ref __copy it may need to do type conversion. + template <__formatter::__char_type _InCharT, class _UnaryOperation> + _LIBCPP_HIDE_FROM_ABI void __transform(const _InCharT* __first, const _InCharT* __last, _UnaryOperation __operation) { + _LIBCPP_ASSERT(__first <= __last, "not a valid range"); + + size_t __n = static_cast(__last - __first); + __flush_on_overflow(__n); + if (__n <= __capacity_) { + _VSTD::transform(__first, __last, _VSTD::addressof(__ptr_[__size_]), _VSTD::move(__operation)); + __size_ += __n; + return; + } + + // The output doesn't fit in the internal buffer. + // Transform the data in "__capacity_" sized chunks. + _LIBCPP_ASSERT(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); + do { + size_t __chunk = _VSTD::min(__n, __capacity_); + _VSTD::transform(__first, __first + __chunk, _VSTD::addressof(__ptr_[__size_]), __operation); + __size_ = __chunk; + __first += __chunk; + __n -= __chunk; + flush(); + } while (__n); + } + + /// A \c fill_n wrapper. + _LIBCPP_HIDE_FROM_ABI void __fill(size_t __n, _CharT __value) { + __flush_on_overflow(__n); + if (__n <= __capacity_) { + _VSTD::fill_n(_VSTD::addressof(__ptr_[__size_]), __n, __value); + __size_ += __n; + return; + } + + // The output doesn't fit in the internal buffer. + // Fill the buffer in "__capacity_" sized chunks. + _LIBCPP_ASSERT(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); + do { + size_t __chunk = _VSTD::min(__n, __capacity_); + _VSTD::fill_n(_VSTD::addressof(__ptr_[__size_]), __chunk, __value); + __size_ = __chunk; + __n -= __chunk; + flush(); + } while (__n); + } + _LIBCPP_HIDE_FROM_ABI void flush() { __flush_(__ptr_, __size_, __obj_); __size_ = 0; @@ -91,6 +181,44 @@ size_t __size_{0}; void (*__flush_)(_CharT*, size_t, void*); void* __obj_; + + /// Flushes the buffer when the output operation would overflow the buffer. + /// + /// A simple approach for the overflow detection would be something along the + /// lines: + /// \code + /// // The internal buffer is large enough. + /// if (__n <= __capacity_) { + /// // Flush when we really would overflow. + /// if (__size_ + __n >= __capacity_) + /// flush(); + /// ... + /// } + /// \endcode + /// + /// This approach works for all cases but one: + /// A __format_to_n_buffer_base where \ref __enable_direct_output is true. + /// In that case the \ref __capacity_ of the buffer changes during the first + /// \ref flush. During that operation the output buffer switches from its + /// __writer_ to its __storage_. The \ref __capacity_ of the former depends + /// on the value of n, of the latter is a fixed size. For example: + /// - a format_to_n call with a 10'000 char buffer, + /// - the buffer is filled with 9'500 chars, + /// - adding 1'000 elements would overflow the buffer so the buffer gets + /// changed and the \ref __capacity_ decreases from 10'000 to + /// __buffer_size (256 at the time of writing). + /// + /// This means that the \ref flush for this class may need to copy a part of + /// the internal buffer to the proper output. In this example there will be + /// 500 characters that need this copy operation. + /// + /// Note it would be more efficient to write 500 chars directly and then swap + /// the buffers. This would make the code more complex and \ref format_to_n is + /// not the most common use case. Therefore the optimization isn't done. + _LIBCPP_HIDE_FROM_ABI void __flush_on_overflow(size_t __n) { + if (__size_ + __n >= __capacity_) + flush(); + } }; /// A storage using an internal buffer. @@ -280,12 +408,12 @@ using _Size = iter_difference_t<_OutIt>; public: - _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer_base(_OutIt __out_it, _Size __n) - : __writer_(_VSTD::move(__out_it)), __n_(_VSTD::max(_Size(0), __n)) {} + _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer_base(_OutIt __out_it, _Size __max_size) + : __writer_(_VSTD::move(__out_it)), __max_size_(_VSTD::max(_Size(0), __max_size)) {} _LIBCPP_HIDE_FROM_ABI void flush(_CharT* __ptr, size_t __size) { - if (_Size(__size_) <= __n_) - __writer_.flush(__ptr, _VSTD::min(_Size(__size), __n_ - __size_)); + if (_Size(__size_) <= __max_size_) + __writer_.flush(__ptr, _VSTD::min(_Size(__size), __max_size_ - __size_)); __size_ += __size; } @@ -294,7 +422,7 @@ __output_buffer<_CharT> __output_{__storage_.begin(), __storage_.__buffer_size, this}; typename __writer_selector<_OutIt, _CharT>::type __writer_; - _Size __n_; + _Size __max_size_; _Size __size_{0}; }; @@ -310,24 +438,35 @@ using _Size = iter_difference_t<_OutIt>; public: - _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer_base(_OutIt __out_it, _Size __n) - : __output_(_VSTD::__unwrap_iter(__out_it), __n, this), __writer_(_VSTD::move(__out_it)) { - if (__n <= 0) [[unlikely]] + _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer_base(_OutIt __out_it, _Size __max_size) + : __output_(_VSTD::__unwrap_iter(__out_it), __max_size, this), + __writer_(_VSTD::move(__out_it)), + __max_size_(__max_size) { + if (__max_size <= 0) [[unlikely]] __output_.reset(__storage_.begin(), __storage_.__buffer_size); } _LIBCPP_HIDE_FROM_ABI void flush(_CharT* __ptr, size_t __size) { - // A flush to the direct writer happens in two occasions: + // A flush to the direct writer happens in the following occasions: // - The format function has written the maximum number of allowed code // units. At this point it's no longer valid to write to this writer. So // switch to the internal storage. This internal storage doesn't need to // be written anywhere so the flush for that storage writes no output. + // - Like above, but the next "mass write" operation would overflow the + // buffer. In that case the buffer is pre-emptively switched. The still + // valid code units will be written separately. // - The format_to_n function is finished. In this case there's no need to // switch the buffer, but for simplicity the buffers are still switched. - // When the __n <= 0 the constructor already switched the buffers. + // When the __max_size <= 0 the constructor already switched the buffers. if (__size_ == 0 && __ptr != __storage_.begin()) { __writer_.flush(__ptr, __size); __output_.reset(__storage_.begin(), __storage_.__buffer_size); + } else if (__size_ < __max_size_) { + // Copies a part of the internal buffer to the output up to n characters. + // See __output_buffer<_CharT>::__flush_on_overflow for more information. + _Size __s = _VSTD::min(_Size(__size), __max_size_ - __size_); + std::copy_n(__ptr, __s, __writer_.out()); + __writer_.flush(__ptr, __s); } __size_ += __size; @@ -338,6 +477,7 @@ __output_buffer<_CharT> __output_; __writer_direct<_OutIt, _CharT> __writer_; + _Size __max_size_; _Size __size_{0}; }; @@ -350,7 +490,8 @@ using _Size = iter_difference_t<_OutIt>; public: - _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer(_OutIt __out_it, _Size __n) : _Base(_VSTD::move(__out_it), __n) {} + _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer(_OutIt __out_it, _Size __max_size) + : _Base(_VSTD::move(__out_it), __max_size) {} _LIBCPP_HIDE_FROM_ABI auto make_output_iterator() { return this->__output_.make_output_iterator(); } _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> result() && { diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -10,9 +10,7 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_FLOATING_POINT_H #define _LIBCPP___FORMAT_FORMATTER_FLOATING_POINT_H -#include <__algorithm/copy.h> #include <__algorithm/copy_n.h> -#include <__algorithm/fill_n.h> #include <__algorithm/find.h> #include <__algorithm/min.h> #include <__algorithm/rotate.h> @@ -528,13 +526,13 @@ // sign and (zero padding or alignment) if (__zero_padding && __first != __buffer.begin()) *__out_it++ = *__buffer.begin(); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); if (!__zero_padding && __first != __buffer.begin()) *__out_it++ = *__buffer.begin(); // integral part if (__grouping.empty()) { - __out_it = _VSTD::copy_n(__first, __digits, _VSTD::move(__out_it)); + __out_it = __formatter::__copy(__first, __digits, _VSTD::move(__out_it)); } else { auto __r = __grouping.rbegin(); auto __e = __grouping.rend() - 1; @@ -546,7 +544,7 @@ // This loop achieves that process by testing the termination condition // midway in the loop. while (true) { - __out_it = _VSTD::copy_n(__first, *__r, _VSTD::move(__out_it)); + __out_it = __formatter::__copy(__first, *__r, _VSTD::move(__out_it)); __first += *__r; if (__r == __e) @@ -560,16 +558,16 @@ // fractional part if (__result.__radix_point != __result.__last) { *__out_it++ = __np.decimal_point(); - __out_it = _VSTD::copy(__result.__radix_point + 1, __result.__exponent, _VSTD::move(__out_it)); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __buffer.__num_trailing_zeros(), _CharT('0')); + __out_it = __formatter::__copy(__result.__radix_point + 1, __result.__exponent, _VSTD::move(__out_it)); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __buffer.__num_trailing_zeros(), _CharT('0')); } // exponent if (__result.__exponent != __result.__last) - __out_it = _VSTD::copy(__result.__exponent, __result.__last, _VSTD::move(__out_it)); + __out_it = __formatter::__copy(__result.__exponent, __result.__last, _VSTD::move(__out_it)); // alignment - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); + return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); } # endif // _LIBCPP_HAS_NO_LOCALIZATION @@ -651,14 +649,15 @@ if (__size + __num_trailing_zeros >= __specs.__width_) { if (__num_trailing_zeros && __result.__exponent != __result.__last) // Insert trailing zeros before exponent character. - return _VSTD::copy( + return __formatter::__copy( __result.__exponent, __result.__last, - _VSTD::fill_n( - _VSTD::copy(__buffer.begin(), __result.__exponent, __ctx.out()), __num_trailing_zeros, _CharT('0'))); + __formatter::__fill(__formatter::__copy(__buffer.begin(), __result.__exponent, __ctx.out()), + __num_trailing_zeros, + _CharT('0'))); - return _VSTD::fill_n( - _VSTD::copy(__buffer.begin(), __result.__last, __ctx.out()), __num_trailing_zeros, _CharT('0')); + return __formatter::__fill( + __formatter::__copy(__buffer.begin(), __result.__last, __ctx.out()), __num_trailing_zeros, _CharT('0')); } auto __out_it = __ctx.out(); diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -243,7 +243,7 @@ // The zero padding is done like: // - Write [sign][prefix] // - Write data right aligned with '0' as fill character. - __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it)); + __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); __specs.__alignment_ = __format_spec::__alignment::__right; __specs.__fill_ = _CharT('0'); int32_t __size = __first - __begin; diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -14,10 +14,13 @@ #include <__algorithm/copy_n.h> #include <__algorithm/fill_n.h> #include <__algorithm/transform.h> +#include <__concepts/same_as.h> #include <__config> +#include <__format/buffer.h> #include <__format/formatter.h> #include <__format/parser_std_format_spec.h> #include <__format/unicode.h> +#include <__iterator/back_insert_iterator.h> #include <__utility/move.h> #include <__utility/unreachable.h> #include @@ -86,6 +89,63 @@ __libcpp_unreachable(); } +/// Copy wrapper. +/// +/// This uses a "mass output function" of __format::__output_buffer when possible. +template <__formatter::__char_type _CharT, __formatter::__char_type _OutCharT = _CharT> +_LIBCPP_HIDE_FROM_ABI auto __copy(basic_string_view<_CharT> __str, output_iterator auto __out_it) + -> decltype(__out_it) { + if constexpr (_VSTD::same_as>>) { + __out_it.__get_container()->__copy(__str); + return __out_it; + } else { + return std::copy_n(__str.data(), __str.size(), _VSTD::move(__out_it)); + } +} + +template <__formatter::__char_type _CharT, __formatter::__char_type _OutCharT = _CharT> +_LIBCPP_HIDE_FROM_ABI auto +__copy(const _CharT* __first, const _CharT* __last, output_iterator auto __out_it) + -> decltype(__out_it) { + return __formatter::__copy(basic_string_view{__first, __last}, _VSTD::move(__out_it)); +} + +template <__formatter::__char_type _CharT, __formatter::__char_type _OutCharT = _CharT> +_LIBCPP_HIDE_FROM_ABI auto __copy(const _CharT* __first, size_t __n, output_iterator auto __out_it) + -> decltype(__out_it) { + return __formatter::__copy(basic_string_view{__first, __n}, _VSTD::move(__out_it)); +} + +/// Transform wrapper. +/// +/// This uses a "mass output function" of __format::__output_buffer when possible. +template <__formatter::__char_type _CharT, __formatter::__char_type _OutCharT = _CharT, class _UnaryOperation> +_LIBCPP_HIDE_FROM_ABI auto +__transform(const _CharT* __first, + const _CharT* __last, + output_iterator auto __out_it, + _UnaryOperation __operation) -> decltype(__out_it) { + if constexpr (_VSTD::same_as>>) { + __out_it.__get_container()->__transform(__first, __last, _VSTD::move(__operation)); + return __out_it; + } else { + return std::transform(__first, __last, _VSTD::move(__out_it), __operation); + } +} + +/// Fill wrapper. +/// +/// This uses a "mass output function" of __format::__output_buffer when possible. +template <__formatter::__char_type _CharT, output_iterator _OutIt> +_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, _CharT __value) { + if constexpr (_VSTD::same_as>>) { + __out_it.__get_container()->__fill(__n, __value); + return __out_it; + } else { + return std::fill_n(_VSTD::move(__out_it), __n, __value); + } +} + template _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first, const char* __last, string&& __grouping, _CharT __sep, @@ -97,22 +157,22 @@ __padding_size_result __padding = {0, 0}; if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) { // Write [sign][prefix]. - __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it)); + __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); if (__specs.__width_ > __size) { // Write zero padding. __padding.__before_ = __specs.__width_ - __size; - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __specs.__width_ - __size, _CharT('0')); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __specs.__width_ - __size, _CharT('0')); } } else { if (__specs.__width_ > __size) { // Determine padding and write padding. __padding = __padding_size(__size, __specs.__width_, __specs.__alignment_); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); } // Write [sign][prefix]. - __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it)); + __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it)); } auto __r = __grouping.rbegin(); @@ -133,10 +193,10 @@ while (true) { if (__specs.__std_.__type_ == __format_spec::__type::__hexadecimal_upper_case) { __last = __first + *__r; - __out_it = _VSTD::transform(__first, __last, _VSTD::move(__out_it), __hex_to_upper); + __out_it = __formatter::__transform(__first, __last, _VSTD::move(__out_it), __hex_to_upper); __first = __last; } else { - __out_it = _VSTD::copy_n(__first, *__r, _VSTD::move(__out_it)); + __out_it = __formatter::__copy(__first, *__r, _VSTD::move(__out_it)); __first += *__r; } @@ -147,7 +207,7 @@ *__out_it++ = __sep; } - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); + return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); } /// Writes the input to the output with the required padding. @@ -155,12 +215,10 @@ /// Since the output column width is specified the function can be used for /// ASCII and Unicode output. /// -/// \pre [\a __first, \a __last) is a valid range. /// \pre \a __size <= \a __width. Using this function when this pre-condition /// doesn't hold incurs an unwanted overhead. /// -/// \param __first Pointer to the first element to write. -/// \param __last Pointer beyond the last element to write. +/// \param __str The string to write. /// \param __out_it The output iterator to write to. /// \param __specs The parsed formatting specifications. /// \param __size The (estimated) output column width. When the elements @@ -174,31 +232,42 @@ /// conversion, which means the [\a __first, \a __last) always contains elements /// of the type \c char. template -_LIBCPP_HIDE_FROM_ABI auto __write( - const _CharT* __first, - const _CharT* __last, - output_iterator auto __out_it, - __format_spec::__parsed_specifications<_ParserCharT> __specs, - ptrdiff_t __size) -> decltype(__out_it) { - _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); - +_LIBCPP_HIDE_FROM_ABI auto +__write(basic_string_view<_CharT> __str, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_ParserCharT> __specs, + ptrdiff_t __size) -> decltype(__out_it) { if (__size >= __specs.__width_) - return _VSTD::copy(__first, __last, _VSTD::move(__out_it)); + return __formatter::__copy(__str, _VSTD::move(__out_it)); __padding_size_result __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__std_.__alignment_); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); - __out_it = _VSTD::copy(__first, __last, _VSTD::move(__out_it)); - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + __out_it = __formatter::__copy(__str, _VSTD::move(__out_it)); + return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); +} + +template +_LIBCPP_HIDE_FROM_ABI auto +__write(const _CharT* __first, + const _CharT* __last, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_ParserCharT> __specs, + ptrdiff_t __size) -> decltype(__out_it) { + _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); + return __formatter::__write(basic_string_view{__first, __last}, _VSTD::move(__out_it), __specs, __size); } /// \overload /// /// Calls the function above where \a __size = \a __last - \a __first. template -_LIBCPP_HIDE_FROM_ABI auto __write(const _CharT* __first, const _CharT* __last, - output_iterator auto __out_it, - __format_spec::__parsed_specifications<_ParserCharT> __specs) -> decltype(__out_it) { - return __write(__first, __last, _VSTD::move(__out_it), __specs, __last - __first); +_LIBCPP_HIDE_FROM_ABI auto +__write(const _CharT* __first, + const _CharT* __last, + output_iterator auto __out_it, + __format_spec::__parsed_specifications<_ParserCharT> __specs) -> decltype(__out_it) { + _LIBCPP_ASSERT(__first <= __last, "Not a valid range"); + return __formatter::__write(__first, __last, _VSTD::move(__out_it), __specs, __last - __first); } template @@ -210,12 +279,12 @@ ptrdiff_t __size = __last - __first; if (__size >= __specs.__width_) - return _VSTD::transform(__first, __last, _VSTD::move(__out_it), __op); + return __formatter::__transform(__first, __last, _VSTD::move(__out_it), __op); __padding_size_result __padding = __padding_size(__size, __specs.__width_, __specs.__alignment_); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); - __out_it = _VSTD::transform(__first, __last, _VSTD::move(__out_it), __op); - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + __out_it = __formatter::__transform(__first, __last, _VSTD::move(__out_it), __op); + return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); } /// Writes additional zero's for the precision before the exponent. @@ -240,11 +309,11 @@ __padding_size_result __padding = __padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); - __out_it = _VSTD::copy(__first, __exponent, _VSTD::move(__out_it)); - __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0')); - __out_it = _VSTD::copy(__exponent, __last, _VSTD::move(__out_it)); - return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_); + __out_it = __formatter::__copy(__first, __exponent, _VSTD::move(__out_it)); + __out_it = __formatter::__fill(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0')); + __out_it = __formatter::__copy(__exponent, __last, _VSTD::move(__out_it)); + return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_); } /// Writes a string using format's width estimation algorithm. @@ -262,7 +331,7 @@ // No padding -> copy the string if (!__specs.__has_width()) - return _VSTD::copy(__str.begin(), __str.end(), _VSTD::move(__out_it)); + return __formatter::__copy(__str, _VSTD::move(__out_it)); // Note when the estimated width is larger than size there's no padding. So // there's no reason to get the real size when the estimate is larger than or @@ -270,8 +339,7 @@ size_t __size = __format_spec::__estimate_column_width(__str, __specs.__width_, __format_spec::__column_width_rounding::__up) .__width_; - - return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size); + return __formatter::__write(__str, _VSTD::move(__out_it), __specs, __size); } template diff --git a/libcxx/include/system_error b/libcxx/include/system_error --- a/libcxx/include/system_error +++ b/libcxx/include/system_error @@ -32,8 +32,9 @@ virtual string message(int ev) const = 0; bool operator==(const error_category& rhs) const noexcept; - bool operator!=(const error_category& rhs) const noexcept; - bool operator<(const error_category& rhs) const noexcept; + bool operator!=(const error_category& rhs) const noexcept; // removed in C++20 + bool operator<(const error_category& rhs) const noexcept; // removed in C++20 + strong_ordering operator<=>(const error_category& rhs) const noexcept; // C++20 }; const error_category& generic_category() noexcept; @@ -75,7 +76,6 @@ }; // non-member functions: -bool operator<(const error_code& lhs, const error_code& rhs) noexcept; template basic_ostream& operator<<(basic_ostream& os, const error_code& ec); @@ -102,8 +102,6 @@ explicit operator bool() const noexcept; }; -bool operator<(const error_condition& lhs, const error_condition& rhs) noexcept; - class system_error : public runtime_error { @@ -128,12 +126,16 @@ // Comparison operators: bool operator==(const error_code& lhs, const error_code& rhs) noexcept; bool operator==(const error_code& lhs, const error_condition& rhs) noexcept; -bool operator==(const error_condition& lhs, const error_code& rhs) noexcept; +bool operator==(const error_condition& lhs, const error_code& rhs) noexcept; // removed in C++20 bool operator==(const error_condition& lhs, const error_condition& rhs) noexcept; -bool operator!=(const error_code& lhs, const error_code& rhs) noexcept; -bool operator!=(const error_code& lhs, const error_condition& rhs) noexcept; -bool operator!=(const error_condition& lhs, const error_code& rhs) noexcept; -bool operator!=(const error_condition& lhs, const error_condition& rhs) noexcept; +bool operator!=(const error_code& lhs, const error_code& rhs) noexcept; // removed in C++20 +bool operator!=(const error_code& lhs, const error_condition& rhs) noexcept; // removed in C++20 +bool operator!=(const error_condition& lhs, const error_code& rhs) noexcept; // removed in C++20 +bool operator!=(const error_condition& lhs, const error_condition& rhs) noexcept; // removed in C++20 +bool operator<(const error_condition& lhs, const error_condition& rhs) noexcept; // removed in C++20 +bool operator<(const error_code& lhs, const error_code& rhs) noexcept; // removed in C++20 +strong_ordering operator<=>(const error_code& lhs, const error_code& rhs) noexcept; // C++20 +strong_ordering operator<=>(const error_condition& lhs, const error_condition& rhs) noexcept; // C++20 template <> struct hash; template <> struct hash; @@ -147,6 +149,7 @@ #include <__errc> #include <__functional/hash.h> #include <__functional/unary_function.h> +#include <__memory/addressof.h> #include #include #include @@ -223,12 +226,21 @@ _LIBCPP_INLINE_VISIBILITY bool operator==(const error_category& __rhs) const _NOEXCEPT {return this == &__rhs;} +#if _LIBCPP_STD_VER > 17 + + _LIBCPP_HIDE_FROM_ABI + strong_ordering operator<=>(const error_category& __rhs) const noexcept {return compare_three_way()(this, std::addressof(__rhs));} + +#else // _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY bool operator!=(const error_category& __rhs) const _NOEXCEPT {return !(*this == __rhs);} _LIBCPP_INLINE_VISIBILITY bool operator< (const error_category& __rhs) const _NOEXCEPT {return this < &__rhs;} +#endif // _LIBCPP_STD_VER > 17 + friend class _LIBCPP_HIDDEN __do_message; }; @@ -303,14 +315,6 @@ return error_condition(static_cast(__e), generic_category()); } -inline _LIBCPP_INLINE_VISIBILITY -bool -operator<(const error_condition& __x, const error_condition& __y) _NOEXCEPT -{ - return __x.category() < __y.category() - || (__x.category() == __y.category() && __x.value() < __y.value()); -} - // error_code class _LIBCPP_TYPE_VIS error_code @@ -379,14 +383,6 @@ return error_code(static_cast(__e), generic_category()); } -inline _LIBCPP_INLINE_VISIBILITY -bool -operator<(const error_code& __x, const error_code& __y) _NOEXCEPT -{ - return __x.category() < __y.category() - || (__x.category() == __y.category() && __x.value() < __y.value()); -} - inline _LIBCPP_INLINE_VISIBILITY bool operator==(const error_code& __x, const error_code& __y) _NOEXCEPT @@ -402,12 +398,14 @@ || __y.category().equivalent(__x, __y.value()); } +#if _LIBCPP_STD_VER <= 17 inline _LIBCPP_INLINE_VISIBILITY bool operator==(const error_condition& __x, const error_code& __y) _NOEXCEPT { return __y == __x; } +#endif inline _LIBCPP_INLINE_VISIBILITY bool @@ -416,6 +414,8 @@ return __x.category() == __y.category() && __x.value() == __y.value(); } +#if _LIBCPP_STD_VER <= 17 + inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const error_code& __x, const error_code& __y) _NOEXCEPT @@ -436,6 +436,42 @@ operator!=(const error_condition& __x, const error_condition& __y) _NOEXCEPT {return !(__x == __y);} +inline _LIBCPP_INLINE_VISIBILITY +bool +operator<(const error_condition& __x, const error_condition& __y) _NOEXCEPT +{ + return __x.category() < __y.category() + || (__x.category() == __y.category() && __x.value() < __y.value()); +} + +inline _LIBCPP_INLINE_VISIBILITY +bool +operator<(const error_code& __x, const error_code& __y) _NOEXCEPT +{ + return __x.category() < __y.category() + || (__x.category() == __y.category() && __x.value() < __y.value()); +} + +#else // _LIBCPP_STD_VER <= 17 + +inline _LIBCPP_HIDE_FROM_ABI strong_ordering +operator<=>(const error_code& __x, const error_code& __y) noexcept +{ + if (auto __c = __x.category() <=> __y.category(); __c != 0) + return __c; + return __x.value() <=> __y.value(); +} + +inline _LIBCPP_HIDE_FROM_ABI strong_ordering +operator<=>(const error_condition& __x, const error_condition& __y) noexcept +{ + if (auto __c = __x.category() <=> __y.category(); __c != 0) + return __c; + return __x.value() <=> __y.value(); +} + +#endif // _LIBCPP_STD_VER <= 17 + template <> struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp --- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// test libc++'s implementation of align_val_t, and the relevant new/delete +// test libc++'s implementation of align_val_t, and the relevant new/delete && !hwasan // overloads in all dialects when -faligned-allocation is present. // The dylibs shipped before macosx10.13 do not contain the aligned allocation @@ -18,7 +18,7 @@ // nor does the dynamic library shipped with z/OS. // UNSUPPORTED: target={{.+}}-zos{{.*}} -// XFAIL: sanitizer-new-delete, ubsan +// XFAIL: sanitizer-new-delete && !hwasan, ubsan // GCC doesn't support the aligned-allocation flags. // XFAIL: gcc diff --git a/libcxx/test/std/diagnostics/syserr/syserr.compare/cmp_error_code.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.compare/cmp_error_code.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/diagnostics/syserr/syserr.compare/cmp_error_code.pass.cpp @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// + +// class error_code + +// strong_ordering operator<=>(const error_code& lhs, const error_code& rhs) noexcept + +#include +#include + +#include "test_macros.h" +#include "test_comparisons.h" + +int main(int, char**) { + AssertOrderAreNoexcept(); + AssertOrderReturn(); + + // Same error category + std::error_code ec1a = std::error_code(1, std::generic_category()); + std::error_code ec1b = std::error_code(1, std::generic_category()); + std::error_code ec2 = std::error_code(2, std::generic_category()); + + assert(testOrder(ec1a, ec1b, std::strong_ordering::equal)); + assert(testOrder(ec1a, ec2, std::strong_ordering::less)); + + // Different error category + const std::error_code& ec3 = std::error_code(2, std::system_category()); + + bool isLess = ec2 < ec3; + assert(testOrder(ec2, ec3, isLess ? std::strong_ordering::less : std::strong_ordering::greater)); + + return 0; +} diff --git a/libcxx/test/std/diagnostics/syserr/syserr.compare/cmp_error_condition.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.compare/cmp_error_condition.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/diagnostics/syserr/syserr.compare/cmp_error_condition.pass.cpp @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// + +// class error_condition + +// strong_ordering operator<=>(const error_condition& lhs, const error_condition& rhs) noexcept + +#include +#include + +#include "test_macros.h" +#include "test_comparisons.h" + +int main(int, char**) { + AssertOrderAreNoexcept(); + AssertOrderReturn(); + + // Same error category + std::error_condition ec1a = std::error_condition(1, std::generic_category()); + std::error_condition ec1b = std::error_condition(1, std::generic_category()); + std::error_condition ec2 = std::error_condition(2, std::generic_category()); + + assert(testOrder(ec1a, ec1b, std::strong_ordering::equal)); + assert(testOrder(ec1a, ec2, std::strong_ordering::less)); + + // Different error category + const std::error_condition& ec3 = std::error_condition(2, std::system_category()); + + bool isLess = ec2 < ec3; + assert(testOrder(ec2, ec3, isLess ? std::strong_ordering::less : std::strong_ordering::greater)); + + return 0; +} diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/cmp.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/cmp.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/cmp.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// + +// class error_category + +// strong_ordering operator<=>(const error_category& rhs) const noexcept; + +#include +#include + +#include "test_macros.h" +#include "test_comparisons.h" + +int main(int, char**) { + AssertOrderAreNoexcept(); + AssertOrderReturn(); + + const std::error_category& e_cat1 = std::generic_category(); + const std::error_category& e_cat2 = std::generic_category(); + const std::error_category& e_cat3 = std::system_category(); + + assert(testOrder(e_cat1, e_cat2, std::strong_ordering::equal)); + + bool isLess = e_cat1 < e_cat3; + assert(testOrder(e_cat1, e_cat3, isLess ? std::strong_ordering::less : std::strong_ordering::greater)); + + return 0; +} diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.unsigned_integral.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.unsigned_integral.pass.cpp --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.unsigned_integral.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.unsigned_integral.pass.cpp @@ -88,6 +88,8 @@ test_termination_condition( STR("340282366920938463463374607431768211455"), STR("}"), A(std::numeric_limits<__uint128_t>::max())); #endif + // Test __formatter::__transform (libc++ specific). + test_termination_condition(STR("FF"), STR("X}"), A(255)); } template diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h --- a/libcxx/test/std/utilities/format/format.functions/format_tests.h +++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h @@ -2557,6 +2557,68 @@ format_test_pointer(check, check_exception); } +/// Tests special buffer functions with a "large" input. +/// +/// This is a test specific for libc++, however the code should behave the same +/// on all implementations. +/// In \c __format::__output_buffer there are some special functions to optimize +/// outputting multiple characters, \c __copy, \c __transform, \c __fill. This +/// test validates whether the functions behave properly when the output size +/// doesn't fit in its internal buffer. +template +void format_test_buffer_optimizations(TestFunction check) { +#ifdef _LIBCPP_VERSION + // Used to validate our test sets are the proper size. + // To test the chunked operations it needs to be larger than the internal + // buffer. Picked a nice looking number. + constexpr int minimum = 3 * std::__format::__internal_storage::__buffer_size; +#else + constexpr int minimum = 1; +#endif + + // Copy + std::basic_string str = STR( + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog." + "The quick brown fox jumps over the lazy dog."); + assert(str.size() > minimum); + check.template operator()<"{}">(std::basic_string_view{str}, str); + + // Fill + std::basic_string fill(minimum, CharT('*')); + check.template operator()<"{:*<{}}">(std::basic_string_view{str + fill}, str, str.size() + minimum); + check.template operator()<"{:*^{}}">( + std::basic_string_view{fill + str + fill}, str, minimum + str.size() + minimum); + check.template operator()<"{:*>{}}">(std::basic_string_view{fill + str}, str, minimum + str.size()); +} + template void format_tests(TestFunction check, ExceptionTest check_exception) { // *** Test escaping *** @@ -2671,6 +2733,9 @@ // *** Test handle formatter argument *** format_test_handle(check, check_exception); + + // *** Test the interal buffer optimizations *** + format_test_buffer_optimizations(check); } #ifndef TEST_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/test/support/MoveOnly.h b/libcxx/test/support/MoveOnly.h --- a/libcxx/test/support/MoveOnly.h +++ b/libcxx/test/support/MoveOnly.h @@ -62,7 +62,7 @@ { typedef MoveOnly argument_type; typedef size_t result_type; - TEST_CONSTEXPR size_t operator()(const MoveOnly& x) const {return x.get();} + TEST_CONSTEXPR size_t operator()(const MoveOnly& x) const {return static_cast(x.get());} }; #endif // MOVEONLY_H diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h --- a/libcxx/test/support/test_macros.h +++ b/libcxx/test/support/test_macros.h @@ -190,8 +190,8 @@ #define TEST_HAS_NO_EXCEPTIONS #endif -#if TEST_HAS_FEATURE(address_sanitizer) || TEST_HAS_FEATURE(memory_sanitizer) || \ - TEST_HAS_FEATURE(thread_sanitizer) +#if TEST_HAS_FEATURE(address_sanitizer) || TEST_HAS_FEATURE(hwaddress_sanitizer) || \ + TEST_HAS_FEATURE(memory_sanitizer) || TEST_HAS_FEATURE(thread_sanitizer) #define TEST_HAS_SANITIZERS #endif diff --git a/libcxx/utils/CMakeLists.txt b/libcxx/utils/CMakeLists.txt --- a/libcxx/utils/CMakeLists.txt +++ b/libcxx/utils/CMakeLists.txt @@ -11,8 +11,24 @@ COMMAND "${Python3_EXECUTABLE}" "${LIBCXX_SOURCE_DIR}/utils/generate_feature_test_macro_components.py" COMMENT "Generate the header and tests for feature test macros.") +add_custom_target(libcxx-generate-extended-grapheme-cluster-tables + COMMAND + "${Python3_EXECUTABLE}" + "${LIBCXX_SOURCE_DIR}/utils/generate_extended_grapheme_cluster_table.py" + "${LIBCXX_SOURCE_DIR}/include/__format/extended_grapheme_cluster_table.h" + COMMENT "Generate the extended grapheme cluster header.") + +add_custom_target(libcxx-generate-extended-grapheme-cluster-tests + COMMAND + "${Python3_EXECUTABLE}" + "${LIBCXX_SOURCE_DIR}/utils/generate_extended_grapheme_cluster_test.py" + "${LIBCXX_SOURCE_DIR}/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h" + COMMENT "Generate the extended grapheme cluster header.") + add_custom_target(libcxx-generate-files DEPENDS libcxx-generate-public-header-transitive-inclusion-tests libcxx-generate-public-header-tests libcxx-generate-feature-test-macros + libcxx-generate-extended-grapheme-cluster-tables + libcxx-generate-extended-grapheme-cluster-tests COMMENT "Create all the auto-generated files in libc++ and its tests.") diff --git a/libcxx/utils/data/unicode/GraphemeBreakProperty.txt b/libcxx/utils/data/unicode/GraphemeBreakProperty.txt new file mode 100644 --- /dev/null +++ b/libcxx/utils/data/unicode/GraphemeBreakProperty.txt @@ -0,0 +1,1459 @@ +# GraphemeBreakProperty-14.0.0.txt +# Date: 2021-08-12, 23:13:02 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +# Property: Grapheme_Cluster_Break + +# All code points not explicitly listed for Grapheme_Cluster_Break +# have the value Other (XX). + +# @missing: 0000..10FFFF; Other + +# ================================================ + +0600..0605 ; Prepend # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +06DD ; Prepend # Cf ARABIC END OF AYAH +070F ; Prepend # Cf SYRIAC ABBREVIATION MARK +0890..0891 ; Prepend # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; Prepend # Cf ARABIC DISPUTED END OF AYAH +0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH +110BD ; Prepend # Cf KAITHI NUMBER SIGN +110CD ; Prepend # Cf KAITHI NUMBER SIGN ABOVE +111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA +1193F ; Prepend # Lo DIVES AKURU PREFIXED NASAL SIGN +11941 ; Prepend # Lo DIVES AKURU INITIAL RA +11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA +11D46 ; Prepend # Lo MASARAM GONDI REPHA + +# Total code points: 26 + +# ================================================ + +000D ; CR # Cc + +# Total code points: 1 + +# ================================================ + +000A ; LF # Cc + +# Total code points: 1 + +# ================================================ + +0000..0009 ; Control # Cc [10] .. +000B..000C ; Control # Cc [2] .. +000E..001F ; Control # Cc [18] .. +007F..009F ; Control # Cc [33] .. +00AD ; Control # Cf SOFT HYPHEN +061C ; Control # Cf ARABIC LETTER MARK +180E ; Control # Cf MONGOLIAN VOWEL SEPARATOR +200B ; Control # Cf ZERO WIDTH SPACE +200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Control # Zl LINE SEPARATOR +2029 ; Control # Zp PARAGRAPH SEPARATOR +202A..202E ; Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; Control # Cf [5] WORD JOINER..INVISIBLE PLUS +2065 ; Control # Cn +2066..206F ; Control # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE +FFF0..FFF8 ; Control # Cn [9] .. +FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +13430..13438 ; Control # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0000 ; Control # Cn +E0001 ; Control # Cf LANGUAGE TAG +E0002..E001F ; Control # Cn [30] .. +E0080..E00FF ; Control # Cn [128] .. +E01F0..E0FFF ; Control # Cn [3600] .. + +# Total code points: 3886 + +# ================================================ + +0300..036F ; Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; Extend # Mn HEBREW POINT RAFE +05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Extend # Mn NKO DANTAYALAN +0816..0819 ; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE +093C ; Extend # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; Extend # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; Extend # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Extend # Mn BENGALI SIGN CANDRABINDU +09BC ; Extend # Mn BENGALI SIGN NUKTA +09BE ; Extend # Mc BENGALI VOWEL SIGN AA +09C1..09C4 ; Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; Extend # Mn BENGALI SIGN VIRAMA +09D7 ; Extend # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; Extend # Mn BENGALI SANDHI MARK +0A01..0A02 ; Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; Extend # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Extend # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Extend # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; Extend # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; Extend # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU +0B3C ; Extend # Mn ORIYA SIGN NUKTA +0B3E ; Extend # Mc ORIYA VOWEL SIGN AA +0B3F ; Extend # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; Extend # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; Extend # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Extend # Mn TAMIL SIGN ANUSVARA +0BBE ; Extend # Mc TAMIL VOWEL SIGN AA +0BC0 ; Extend # Mn TAMIL VOWEL SIGN II +0BCD ; Extend # Mn TAMIL SIGN VIRAMA +0BD7 ; Extend # Mc TAMIL AU LENGTH MARK +0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Extend # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Extend # Mn KANNADA SIGN CANDRABINDU +0CBC ; Extend # Mn KANNADA SIGN NUKTA +0CBF ; Extend # Mn KANNADA VOWEL SIGN I +0CC2 ; Extend # Mc KANNADA VOWEL SIGN UU +0CC6 ; Extend # Mn KANNADA VOWEL SIGN E +0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA +0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA +0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Extend # Mn SINHALA SIGN CANDRABINDU +0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA +0DCF ; Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DDF ; Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +0E31 ; Extend # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECD ; Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Extend # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; Extend # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; Extend # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; Extend # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Extend # Mn KHMER SIGN ATTHACAN +180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; Extend # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; Extend # Mn BUGINESE VOWEL SIGN AE +1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Extend # Mn TAI THAM SIGN SAKOT +1A62 ; Extend # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; Extend # Mn BALINESE SIGN REREKAN +1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; Extend # Mn BALINESE VOWEL SIGN LA LENGA +1B42 ; Extend # Mn BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; Extend # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; Extend # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; Extend # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C ; Extend # Cf ZERO WIDTH NON-JOINER +20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; Extend # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; Extend # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; Extend # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9E5 ; Extend # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; Extend # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; Extend # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA +ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; Extend # Mn MEETEI MAYEK APUN IYEK +FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Extend # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Extend # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; Extend # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; Extend # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; Extend # Mn MAHAJANI SIGN NUKTA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; Extend # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Extend # Mn KHOJKI SIGN SUKUN +112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133E ; Extend # Mc GRANTHA VOWEL SIGN AA +11340 ; Extend # Mn GRANTHA VOWEL SIGN II +11357 ; Extend # Mc GRANTHA AU LENGTH MARK +11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; Extend # Mn NEWA SIGN NUKTA +1145E ; Extend # Mn NEWA SANDHI MARK +114B0 ; Extend # Mc TIRHUTA VOWEL SIGN AA +114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E +114BD ; Extend # Mc TIRHUTA VOWEL SIGN SHORT O +114BF..114C0 ; Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115AF ; Extend # Mc SIDDHAM VOWEL SIGN AA +115B2..115B5 ; Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; Extend # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; Extend # Mn MODI SIGN ANUSVARA +1163F..11640 ; Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Extend # Mn TAKRI SIGN NUKTA +1171D..1171F ; Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +11930 ; Extend # Mc DIVES AKURU VOWEL SIGN AA +1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; Extend # Mn DIVES AKURU VIRAMA +11943 ; Extend # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; Extend # Mn NANDINAGARI SIGN VIRAMA +11A01..11A0A ; Extend # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Extend # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; Extend # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; Extend # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; Extend # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER +1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Extend # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2095 + +# ================================================ + +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# ================================================ + +0903 ; SpacingMark # Mc DEVANAGARI SIGN VISARGA +093B ; SpacingMark # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; SpacingMark # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; SpacingMark # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; SpacingMark # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0982..0983 ; SpacingMark # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BF..09C0 ; SpacingMark # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II +09C7..09C8 ; SpacingMark # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; SpacingMark # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +0A03 ; SpacingMark # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; SpacingMark # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A83 ; SpacingMark # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; SpacingMark # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; SpacingMark # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; SpacingMark # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0B02..0B03 ; SpacingMark # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B40 ; SpacingMark # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; SpacingMark # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; SpacingMark # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0BBF ; SpacingMark # Mc TAMIL VOWEL SIGN I +0BC1..0BC2 ; SpacingMark # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; SpacingMark # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; SpacingMark # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0C01..0C03 ; SpacingMark # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C41..0C44 ; SpacingMark # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C82..0C83 ; SpacingMark # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; SpacingMark # Mc KANNADA VOWEL SIGN AA +0CC0..0CC1 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U +0CC3..0CC4 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR +0CC7..0CC8 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; SpacingMark # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0D02..0D03 ; SpacingMark # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3F..0D40 ; SpacingMark # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II +0D46..0D48 ; SpacingMark # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; SpacingMark # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D82..0D83 ; SpacingMark # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DD0..0DD1 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDE ; SpacingMark # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA +0DF2..0DF3 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E33 ; SpacingMark # Lo THAI CHARACTER SARA AM +0EB3 ; SpacingMark # Lo LAO VOWEL SIGN AM +0F3E..0F3F ; SpacingMark # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F7F ; SpacingMark # Mc TIBETAN SIGN RNAM BCAD +1031 ; SpacingMark # Mc MYANMAR VOWEL SIGN E +103B..103C ; SpacingMark # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +1056..1057 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1084 ; SpacingMark # Mc MYANMAR VOWEL SIGN SHAN E +1715 ; SpacingMark # Mc TAGALOG SIGN PAMUDPOD +1734 ; SpacingMark # Mc HANUNOO SIGN PAMUDPOD +17B6 ; SpacingMark # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; SpacingMark # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; SpacingMark # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +1923..1926 ; SpacingMark # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; SpacingMark # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; SpacingMark # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; SpacingMark # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A19..1A1A ; SpacingMark # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A55 ; SpacingMark # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; SpacingMark # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A6D..1A72 ; SpacingMark # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1B04 ; SpacingMark # Mc BALINESE SIGN BISAH +1B3B ; SpacingMark # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3D..1B41 ; SpacingMark # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B43..1B44 ; SpacingMark # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B82 ; SpacingMark # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BAA ; SpacingMark # Mc SUNDANESE SIGN PAMAAEH +1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E +1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U +1BF2..1BF3 ; SpacingMark # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C24..1C2B ; SpacingMark # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CF7 ; SpacingMark # Mc VEDIC SIGN ATIKRAMA +A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; SpacingMark # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A952..A953 ; SpacingMark # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A983 ; SpacingMark # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BE..A9C0 ; SpacingMark # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +AA2F..AA30 ; SpacingMark # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; SpacingMark # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA4D ; SpacingMark # Mc CHAM CONSONANT SIGN FINAL H +AAEB ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK +11000 ; SpacingMark # Mc BRAHMI SIGN CANDRABINDU +11002 ; SpacingMark # Mc BRAHMI SIGN VISARGA +11082 ; SpacingMark # Mc KAITHI SIGN VISARGA +110B0..110B2 ; SpacingMark # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; SpacingMark # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +1112C ; SpacingMark # Mc CHAKMA VOWEL SIGN E +11145..11146 ; SpacingMark # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11182 ; SpacingMark # Mc SHARADA SIGN VISARGA +111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111CE ; SpacingMark # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11235 ; SpacingMark # Mc KHOJKI SIGN VIRAMA +112E0..112E2 ; SpacingMark # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +11302..11303 ; SpacingMark # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133F ; SpacingMark # Mc GRANTHA VOWEL SIGN I +11341..11344 ; SpacingMark # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11435..11437 ; SpacingMark # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; SpacingMark # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; SpacingMark # Mc NEWA SIGN VISARGA +114B1..114B2 ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II +114B9 ; SpacingMark # Mc TIRHUTA VOWEL SIGN E +114BB..114BC ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O +114BE ; SpacingMark # Mc TIRHUTA VOWEL SIGN AU +114C1 ; SpacingMark # Mc TIRHUTA SIGN VISARGA +115B0..115B1 ; SpacingMark # Mc [2] SIDDHAM VOWEL SIGN I..SIDDHAM VOWEL SIGN II +115B8..115BB ; SpacingMark # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; SpacingMark # Mc SIDDHAM SIGN VISARGA +11630..11632 ; SpacingMark # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; SpacingMark # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; SpacingMark # Mc MODI SIGN VISARGA +116AC ; SpacingMark # Mc TAKRI SIGN VISARGA +116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA +11726 ; SpacingMark # Mc AHOM VOWEL SIGN E +1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +11838 ; SpacingMark # Mc DOGRA SIGN VISARGA +11931..11935 ; SpacingMark # Mc [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E +11937..11938 ; SpacingMark # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193D ; SpacingMark # Mc DIVES AKURU SIGN HALANTA +11940 ; SpacingMark # Mc DIVES AKURU MEDIAL YA +11942 ; SpacingMark # Mc DIVES AKURU MEDIAL RA +119D1..119D3 ; SpacingMark # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119DC..119DF ; SpacingMark # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E4 ; SpacingMark # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA +11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA +11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA +11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA +11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA +11CB1 ; SpacingMark # Mc MARCHEN VOWEL SIGN I +11CB4 ; SpacingMark # Mc MARCHEN VOWEL SIGN O +11D8A..11D8E ; SpacingMark # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D93..11D94 ; SpacingMark # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D96 ; SpacingMark # Mc GUNJALA GONDI SIGN VISARGA +11EF5..11EF6 ; SpacingMark # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16FF0..16FF1 ; SpacingMark # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT + +# Total code points: 388 + +# ================================================ + +1100..115F ; L # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER +A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH + +# Total code points: 125 + +# ================================================ + +1160..11A7 ; V # Lo [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE +D7B0..D7C6 ; V # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E + +# Total code points: 95 + +# ================================================ + +11A8..11FF ; T # Lo [88] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN +D7CB..D7FB ; T # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH + +# Total code points: 137 + +# ================================================ + +AC00 ; LV # Lo HANGUL SYLLABLE GA +AC1C ; LV # Lo HANGUL SYLLABLE GAE +AC38 ; LV # Lo HANGUL SYLLABLE GYA +AC54 ; LV # Lo HANGUL SYLLABLE GYAE +AC70 ; LV # Lo HANGUL SYLLABLE GEO +AC8C ; LV # Lo HANGUL SYLLABLE GE +ACA8 ; LV # Lo HANGUL SYLLABLE GYEO +ACC4 ; LV # Lo HANGUL SYLLABLE GYE +ACE0 ; LV # Lo HANGUL SYLLABLE GO +ACFC ; LV # Lo HANGUL SYLLABLE GWA +AD18 ; LV # Lo HANGUL SYLLABLE GWAE +AD34 ; LV # Lo HANGUL SYLLABLE GOE +AD50 ; LV # Lo HANGUL SYLLABLE GYO +AD6C ; LV # Lo HANGUL SYLLABLE GU +AD88 ; LV # Lo HANGUL SYLLABLE GWEO +ADA4 ; LV # Lo HANGUL SYLLABLE GWE +ADC0 ; LV # Lo HANGUL SYLLABLE GWI +ADDC ; LV # Lo HANGUL SYLLABLE GYU +ADF8 ; LV # Lo HANGUL SYLLABLE GEU +AE14 ; LV # Lo HANGUL SYLLABLE GYI +AE30 ; LV # Lo HANGUL SYLLABLE GI +AE4C ; LV # Lo HANGUL SYLLABLE GGA +AE68 ; LV # Lo HANGUL SYLLABLE GGAE +AE84 ; LV # Lo HANGUL SYLLABLE GGYA +AEA0 ; LV # Lo HANGUL SYLLABLE GGYAE +AEBC ; LV # Lo HANGUL SYLLABLE GGEO +AED8 ; LV # Lo HANGUL SYLLABLE GGE +AEF4 ; LV # Lo HANGUL SYLLABLE GGYEO +AF10 ; LV # Lo HANGUL SYLLABLE GGYE +AF2C ; LV # Lo HANGUL SYLLABLE GGO +AF48 ; LV # Lo HANGUL SYLLABLE GGWA +AF64 ; LV # Lo HANGUL SYLLABLE GGWAE +AF80 ; LV # Lo HANGUL SYLLABLE GGOE +AF9C ; LV # Lo HANGUL SYLLABLE GGYO +AFB8 ; LV # Lo HANGUL SYLLABLE GGU +AFD4 ; LV # Lo HANGUL SYLLABLE GGWEO +AFF0 ; LV # Lo HANGUL SYLLABLE GGWE +B00C ; LV # Lo HANGUL SYLLABLE GGWI +B028 ; LV # Lo HANGUL SYLLABLE GGYU +B044 ; LV # Lo HANGUL SYLLABLE GGEU +B060 ; LV # Lo HANGUL SYLLABLE GGYI +B07C ; LV # Lo HANGUL SYLLABLE GGI +B098 ; LV # Lo HANGUL SYLLABLE NA +B0B4 ; LV # Lo HANGUL SYLLABLE NAE +B0D0 ; LV # Lo HANGUL SYLLABLE NYA +B0EC ; LV # Lo HANGUL SYLLABLE NYAE +B108 ; LV # Lo HANGUL SYLLABLE NEO +B124 ; LV # Lo HANGUL SYLLABLE NE +B140 ; LV # Lo HANGUL SYLLABLE NYEO +B15C ; LV # Lo HANGUL SYLLABLE NYE +B178 ; LV # Lo HANGUL SYLLABLE NO +B194 ; LV # Lo HANGUL SYLLABLE NWA +B1B0 ; LV # Lo HANGUL SYLLABLE NWAE +B1CC ; LV # Lo HANGUL SYLLABLE NOE +B1E8 ; LV # Lo HANGUL SYLLABLE NYO +B204 ; LV # Lo HANGUL SYLLABLE NU +B220 ; LV # Lo HANGUL SYLLABLE NWEO +B23C ; LV # Lo HANGUL SYLLABLE NWE +B258 ; LV # Lo HANGUL SYLLABLE NWI +B274 ; LV # Lo HANGUL SYLLABLE NYU +B290 ; LV # Lo HANGUL SYLLABLE NEU +B2AC ; LV # Lo HANGUL SYLLABLE NYI +B2C8 ; LV # Lo HANGUL SYLLABLE NI +B2E4 ; LV # Lo HANGUL SYLLABLE DA +B300 ; LV # Lo HANGUL SYLLABLE DAE +B31C ; LV # Lo HANGUL SYLLABLE DYA +B338 ; LV # Lo HANGUL SYLLABLE DYAE +B354 ; LV # Lo HANGUL SYLLABLE DEO +B370 ; LV # Lo HANGUL SYLLABLE DE +B38C ; LV # Lo HANGUL SYLLABLE DYEO +B3A8 ; LV # Lo HANGUL SYLLABLE DYE +B3C4 ; LV # Lo HANGUL SYLLABLE DO +B3E0 ; LV # Lo HANGUL SYLLABLE DWA +B3FC ; LV # Lo HANGUL SYLLABLE DWAE +B418 ; LV # Lo HANGUL SYLLABLE DOE +B434 ; LV # Lo HANGUL SYLLABLE DYO +B450 ; LV # Lo HANGUL SYLLABLE DU +B46C ; LV # Lo HANGUL SYLLABLE DWEO +B488 ; LV # Lo HANGUL SYLLABLE DWE +B4A4 ; LV # Lo HANGUL SYLLABLE DWI +B4C0 ; LV # Lo HANGUL SYLLABLE DYU +B4DC ; LV # Lo HANGUL SYLLABLE DEU +B4F8 ; LV # Lo HANGUL SYLLABLE DYI +B514 ; LV # Lo HANGUL SYLLABLE DI +B530 ; LV # Lo HANGUL SYLLABLE DDA +B54C ; LV # Lo HANGUL SYLLABLE DDAE +B568 ; LV # Lo HANGUL SYLLABLE DDYA +B584 ; LV # Lo HANGUL SYLLABLE DDYAE +B5A0 ; LV # Lo HANGUL SYLLABLE DDEO +B5BC ; LV # Lo HANGUL SYLLABLE DDE +B5D8 ; LV # Lo HANGUL SYLLABLE DDYEO +B5F4 ; LV # Lo HANGUL SYLLABLE DDYE +B610 ; LV # Lo HANGUL SYLLABLE DDO +B62C ; LV # Lo HANGUL SYLLABLE DDWA +B648 ; LV # Lo HANGUL SYLLABLE DDWAE +B664 ; LV # Lo HANGUL SYLLABLE DDOE +B680 ; LV # Lo HANGUL SYLLABLE DDYO +B69C ; LV # Lo HANGUL SYLLABLE DDU +B6B8 ; LV # Lo HANGUL SYLLABLE DDWEO +B6D4 ; LV # Lo HANGUL SYLLABLE DDWE +B6F0 ; LV # Lo HANGUL SYLLABLE DDWI +B70C ; LV # Lo HANGUL SYLLABLE DDYU +B728 ; LV # Lo HANGUL SYLLABLE DDEU +B744 ; LV # Lo HANGUL SYLLABLE DDYI +B760 ; LV # Lo HANGUL SYLLABLE DDI +B77C ; LV # Lo HANGUL SYLLABLE RA +B798 ; LV # Lo HANGUL SYLLABLE RAE +B7B4 ; LV # Lo HANGUL SYLLABLE RYA +B7D0 ; LV # Lo HANGUL SYLLABLE RYAE +B7EC ; LV # Lo HANGUL SYLLABLE REO +B808 ; LV # Lo HANGUL SYLLABLE RE +B824 ; LV # Lo HANGUL SYLLABLE RYEO +B840 ; LV # Lo HANGUL SYLLABLE RYE +B85C ; LV # Lo HANGUL SYLLABLE RO +B878 ; LV # Lo HANGUL SYLLABLE RWA +B894 ; LV # Lo HANGUL SYLLABLE RWAE +B8B0 ; LV # Lo HANGUL SYLLABLE ROE +B8CC ; LV # Lo HANGUL SYLLABLE RYO +B8E8 ; LV # Lo HANGUL SYLLABLE RU +B904 ; LV # Lo HANGUL SYLLABLE RWEO +B920 ; LV # Lo HANGUL SYLLABLE RWE +B93C ; LV # Lo HANGUL SYLLABLE RWI +B958 ; LV # Lo HANGUL SYLLABLE RYU +B974 ; LV # Lo HANGUL SYLLABLE REU +B990 ; LV # Lo HANGUL SYLLABLE RYI +B9AC ; LV # Lo HANGUL SYLLABLE RI +B9C8 ; LV # Lo HANGUL SYLLABLE MA +B9E4 ; LV # Lo HANGUL SYLLABLE MAE +BA00 ; LV # Lo HANGUL SYLLABLE MYA +BA1C ; LV # Lo HANGUL SYLLABLE MYAE +BA38 ; LV # Lo HANGUL SYLLABLE MEO +BA54 ; LV # Lo HANGUL SYLLABLE ME +BA70 ; LV # Lo HANGUL SYLLABLE MYEO +BA8C ; LV # Lo HANGUL SYLLABLE MYE +BAA8 ; LV # Lo HANGUL SYLLABLE MO +BAC4 ; LV # Lo HANGUL SYLLABLE MWA +BAE0 ; LV # Lo HANGUL SYLLABLE MWAE +BAFC ; LV # Lo HANGUL SYLLABLE MOE +BB18 ; LV # Lo HANGUL SYLLABLE MYO +BB34 ; LV # Lo HANGUL SYLLABLE MU +BB50 ; LV # Lo HANGUL SYLLABLE MWEO +BB6C ; LV # Lo HANGUL SYLLABLE MWE +BB88 ; LV # Lo HANGUL SYLLABLE MWI +BBA4 ; LV # Lo HANGUL SYLLABLE MYU +BBC0 ; LV # Lo HANGUL SYLLABLE MEU +BBDC ; LV # Lo HANGUL SYLLABLE MYI +BBF8 ; LV # Lo HANGUL SYLLABLE MI +BC14 ; LV # Lo HANGUL SYLLABLE BA +BC30 ; LV # Lo HANGUL SYLLABLE BAE +BC4C ; LV # Lo HANGUL SYLLABLE BYA +BC68 ; LV # Lo HANGUL SYLLABLE BYAE +BC84 ; LV # Lo HANGUL SYLLABLE BEO +BCA0 ; LV # Lo HANGUL SYLLABLE BE +BCBC ; LV # Lo HANGUL SYLLABLE BYEO +BCD8 ; LV # Lo HANGUL SYLLABLE BYE +BCF4 ; LV # Lo HANGUL SYLLABLE BO +BD10 ; LV # Lo HANGUL SYLLABLE BWA +BD2C ; LV # Lo HANGUL SYLLABLE BWAE +BD48 ; LV # Lo HANGUL SYLLABLE BOE +BD64 ; LV # Lo HANGUL SYLLABLE BYO +BD80 ; LV # Lo HANGUL SYLLABLE BU +BD9C ; LV # Lo HANGUL SYLLABLE BWEO +BDB8 ; LV # Lo HANGUL SYLLABLE BWE +BDD4 ; LV # Lo HANGUL SYLLABLE BWI +BDF0 ; LV # Lo HANGUL SYLLABLE BYU +BE0C ; LV # Lo HANGUL SYLLABLE BEU +BE28 ; LV # Lo HANGUL SYLLABLE BYI +BE44 ; LV # Lo HANGUL SYLLABLE BI +BE60 ; LV # Lo HANGUL SYLLABLE BBA +BE7C ; LV # Lo HANGUL SYLLABLE BBAE +BE98 ; LV # Lo HANGUL SYLLABLE BBYA +BEB4 ; LV # Lo HANGUL SYLLABLE BBYAE +BED0 ; LV # Lo HANGUL SYLLABLE BBEO +BEEC ; LV # Lo HANGUL SYLLABLE BBE +BF08 ; LV # Lo HANGUL SYLLABLE BBYEO +BF24 ; LV # Lo HANGUL SYLLABLE BBYE +BF40 ; LV # Lo HANGUL SYLLABLE BBO +BF5C ; LV # Lo HANGUL SYLLABLE BBWA +BF78 ; LV # Lo HANGUL SYLLABLE BBWAE +BF94 ; LV # Lo HANGUL SYLLABLE BBOE +BFB0 ; LV # Lo HANGUL SYLLABLE BBYO +BFCC ; LV # Lo HANGUL SYLLABLE BBU +BFE8 ; LV # Lo HANGUL SYLLABLE BBWEO +C004 ; LV # Lo HANGUL SYLLABLE BBWE +C020 ; LV # Lo HANGUL SYLLABLE BBWI +C03C ; LV # Lo HANGUL SYLLABLE BBYU +C058 ; LV # Lo HANGUL SYLLABLE BBEU +C074 ; LV # Lo HANGUL SYLLABLE BBYI +C090 ; LV # Lo HANGUL SYLLABLE BBI +C0AC ; LV # Lo HANGUL SYLLABLE SA +C0C8 ; LV # Lo HANGUL SYLLABLE SAE +C0E4 ; LV # Lo HANGUL SYLLABLE SYA +C100 ; LV # Lo HANGUL SYLLABLE SYAE +C11C ; LV # Lo HANGUL SYLLABLE SEO +C138 ; LV # Lo HANGUL SYLLABLE SE +C154 ; LV # Lo HANGUL SYLLABLE SYEO +C170 ; LV # Lo HANGUL SYLLABLE SYE +C18C ; LV # Lo HANGUL SYLLABLE SO +C1A8 ; LV # Lo HANGUL SYLLABLE SWA +C1C4 ; LV # Lo HANGUL SYLLABLE SWAE +C1E0 ; LV # Lo HANGUL SYLLABLE SOE +C1FC ; LV # Lo HANGUL SYLLABLE SYO +C218 ; LV # Lo HANGUL SYLLABLE SU +C234 ; LV # Lo HANGUL SYLLABLE SWEO +C250 ; LV # Lo HANGUL SYLLABLE SWE +C26C ; LV # Lo HANGUL SYLLABLE SWI +C288 ; LV # Lo HANGUL SYLLABLE SYU +C2A4 ; LV # Lo HANGUL SYLLABLE SEU +C2C0 ; LV # Lo HANGUL SYLLABLE SYI +C2DC ; LV # Lo HANGUL SYLLABLE SI +C2F8 ; LV # Lo HANGUL SYLLABLE SSA +C314 ; LV # Lo HANGUL SYLLABLE SSAE +C330 ; LV # Lo HANGUL SYLLABLE SSYA +C34C ; LV # Lo HANGUL SYLLABLE SSYAE +C368 ; LV # Lo HANGUL SYLLABLE SSEO +C384 ; LV # Lo HANGUL SYLLABLE SSE +C3A0 ; LV # Lo HANGUL SYLLABLE SSYEO +C3BC ; LV # Lo HANGUL SYLLABLE SSYE +C3D8 ; LV # Lo HANGUL SYLLABLE SSO +C3F4 ; LV # Lo HANGUL SYLLABLE SSWA +C410 ; LV # Lo HANGUL SYLLABLE SSWAE +C42C ; LV # Lo HANGUL SYLLABLE SSOE +C448 ; LV # Lo HANGUL SYLLABLE SSYO +C464 ; LV # Lo HANGUL SYLLABLE SSU +C480 ; LV # Lo HANGUL SYLLABLE SSWEO +C49C ; LV # Lo HANGUL SYLLABLE SSWE +C4B8 ; LV # Lo HANGUL SYLLABLE SSWI +C4D4 ; LV # Lo HANGUL SYLLABLE SSYU +C4F0 ; LV # Lo HANGUL SYLLABLE SSEU +C50C ; LV # Lo HANGUL SYLLABLE SSYI +C528 ; LV # Lo HANGUL SYLLABLE SSI +C544 ; LV # Lo HANGUL SYLLABLE A +C560 ; LV # Lo HANGUL SYLLABLE AE +C57C ; LV # Lo HANGUL SYLLABLE YA +C598 ; LV # Lo HANGUL SYLLABLE YAE +C5B4 ; LV # Lo HANGUL SYLLABLE EO +C5D0 ; LV # Lo HANGUL SYLLABLE E +C5EC ; LV # Lo HANGUL SYLLABLE YEO +C608 ; LV # Lo HANGUL SYLLABLE YE +C624 ; LV # Lo HANGUL SYLLABLE O +C640 ; LV # Lo HANGUL SYLLABLE WA +C65C ; LV # Lo HANGUL SYLLABLE WAE +C678 ; LV # Lo HANGUL SYLLABLE OE +C694 ; LV # Lo HANGUL SYLLABLE YO +C6B0 ; LV # Lo HANGUL SYLLABLE U +C6CC ; LV # Lo HANGUL SYLLABLE WEO +C6E8 ; LV # Lo HANGUL SYLLABLE WE +C704 ; LV # Lo HANGUL SYLLABLE WI +C720 ; LV # Lo HANGUL SYLLABLE YU +C73C ; LV # Lo HANGUL SYLLABLE EU +C758 ; LV # Lo HANGUL SYLLABLE YI +C774 ; LV # Lo HANGUL SYLLABLE I +C790 ; LV # Lo HANGUL SYLLABLE JA +C7AC ; LV # Lo HANGUL SYLLABLE JAE +C7C8 ; LV # Lo HANGUL SYLLABLE JYA +C7E4 ; LV # Lo HANGUL SYLLABLE JYAE +C800 ; LV # Lo HANGUL SYLLABLE JEO +C81C ; LV # Lo HANGUL SYLLABLE JE +C838 ; LV # Lo HANGUL SYLLABLE JYEO +C854 ; LV # Lo HANGUL SYLLABLE JYE +C870 ; LV # Lo HANGUL SYLLABLE JO +C88C ; LV # Lo HANGUL SYLLABLE JWA +C8A8 ; LV # Lo HANGUL SYLLABLE JWAE +C8C4 ; LV # Lo HANGUL SYLLABLE JOE +C8E0 ; LV # Lo HANGUL SYLLABLE JYO +C8FC ; LV # Lo HANGUL SYLLABLE JU +C918 ; LV # Lo HANGUL SYLLABLE JWEO +C934 ; LV # Lo HANGUL SYLLABLE JWE +C950 ; LV # Lo HANGUL SYLLABLE JWI +C96C ; LV # Lo HANGUL SYLLABLE JYU +C988 ; LV # Lo HANGUL SYLLABLE JEU +C9A4 ; LV # Lo HANGUL SYLLABLE JYI +C9C0 ; LV # Lo HANGUL SYLLABLE JI +C9DC ; LV # Lo HANGUL SYLLABLE JJA +C9F8 ; LV # Lo HANGUL SYLLABLE JJAE +CA14 ; LV # Lo HANGUL SYLLABLE JJYA +CA30 ; LV # Lo HANGUL SYLLABLE JJYAE +CA4C ; LV # Lo HANGUL SYLLABLE JJEO +CA68 ; LV # Lo HANGUL SYLLABLE JJE +CA84 ; LV # Lo HANGUL SYLLABLE JJYEO +CAA0 ; LV # Lo HANGUL SYLLABLE JJYE +CABC ; LV # Lo HANGUL SYLLABLE JJO +CAD8 ; LV # Lo HANGUL SYLLABLE JJWA +CAF4 ; LV # Lo HANGUL SYLLABLE JJWAE +CB10 ; LV # Lo HANGUL SYLLABLE JJOE +CB2C ; LV # Lo HANGUL SYLLABLE JJYO +CB48 ; LV # Lo HANGUL SYLLABLE JJU +CB64 ; LV # Lo HANGUL SYLLABLE JJWEO +CB80 ; LV # Lo HANGUL SYLLABLE JJWE +CB9C ; LV # Lo HANGUL SYLLABLE JJWI +CBB8 ; LV # Lo HANGUL SYLLABLE JJYU +CBD4 ; LV # Lo HANGUL SYLLABLE JJEU +CBF0 ; LV # Lo HANGUL SYLLABLE JJYI +CC0C ; LV # Lo HANGUL SYLLABLE JJI +CC28 ; LV # Lo HANGUL SYLLABLE CA +CC44 ; LV # Lo HANGUL SYLLABLE CAE +CC60 ; LV # Lo HANGUL SYLLABLE CYA +CC7C ; LV # Lo HANGUL SYLLABLE CYAE +CC98 ; LV # Lo HANGUL SYLLABLE CEO +CCB4 ; LV # Lo HANGUL SYLLABLE CE +CCD0 ; LV # Lo HANGUL SYLLABLE CYEO +CCEC ; LV # Lo HANGUL SYLLABLE CYE +CD08 ; LV # Lo HANGUL SYLLABLE CO +CD24 ; LV # Lo HANGUL SYLLABLE CWA +CD40 ; LV # Lo HANGUL SYLLABLE CWAE +CD5C ; LV # Lo HANGUL SYLLABLE COE +CD78 ; LV # Lo HANGUL SYLLABLE CYO +CD94 ; LV # Lo HANGUL SYLLABLE CU +CDB0 ; LV # Lo HANGUL SYLLABLE CWEO +CDCC ; LV # Lo HANGUL SYLLABLE CWE +CDE8 ; LV # Lo HANGUL SYLLABLE CWI +CE04 ; LV # Lo HANGUL SYLLABLE CYU +CE20 ; LV # Lo HANGUL SYLLABLE CEU +CE3C ; LV # Lo HANGUL SYLLABLE CYI +CE58 ; LV # Lo HANGUL SYLLABLE CI +CE74 ; LV # Lo HANGUL SYLLABLE KA +CE90 ; LV # Lo HANGUL SYLLABLE KAE +CEAC ; LV # Lo HANGUL SYLLABLE KYA +CEC8 ; LV # Lo HANGUL SYLLABLE KYAE +CEE4 ; LV # Lo HANGUL SYLLABLE KEO +CF00 ; LV # Lo HANGUL SYLLABLE KE +CF1C ; LV # Lo HANGUL SYLLABLE KYEO +CF38 ; LV # Lo HANGUL SYLLABLE KYE +CF54 ; LV # Lo HANGUL SYLLABLE KO +CF70 ; LV # Lo HANGUL SYLLABLE KWA +CF8C ; LV # Lo HANGUL SYLLABLE KWAE +CFA8 ; LV # Lo HANGUL SYLLABLE KOE +CFC4 ; LV # Lo HANGUL SYLLABLE KYO +CFE0 ; LV # Lo HANGUL SYLLABLE KU +CFFC ; LV # Lo HANGUL SYLLABLE KWEO +D018 ; LV # Lo HANGUL SYLLABLE KWE +D034 ; LV # Lo HANGUL SYLLABLE KWI +D050 ; LV # Lo HANGUL SYLLABLE KYU +D06C ; LV # Lo HANGUL SYLLABLE KEU +D088 ; LV # Lo HANGUL SYLLABLE KYI +D0A4 ; LV # Lo HANGUL SYLLABLE KI +D0C0 ; LV # Lo HANGUL SYLLABLE TA +D0DC ; LV # Lo HANGUL SYLLABLE TAE +D0F8 ; LV # Lo HANGUL SYLLABLE TYA +D114 ; LV # Lo HANGUL SYLLABLE TYAE +D130 ; LV # Lo HANGUL SYLLABLE TEO +D14C ; LV # Lo HANGUL SYLLABLE TE +D168 ; LV # Lo HANGUL SYLLABLE TYEO +D184 ; LV # Lo HANGUL SYLLABLE TYE +D1A0 ; LV # Lo HANGUL SYLLABLE TO +D1BC ; LV # Lo HANGUL SYLLABLE TWA +D1D8 ; LV # Lo HANGUL SYLLABLE TWAE +D1F4 ; LV # Lo HANGUL SYLLABLE TOE +D210 ; LV # Lo HANGUL SYLLABLE TYO +D22C ; LV # Lo HANGUL SYLLABLE TU +D248 ; LV # Lo HANGUL SYLLABLE TWEO +D264 ; LV # Lo HANGUL SYLLABLE TWE +D280 ; LV # Lo HANGUL SYLLABLE TWI +D29C ; LV # Lo HANGUL SYLLABLE TYU +D2B8 ; LV # Lo HANGUL SYLLABLE TEU +D2D4 ; LV # Lo HANGUL SYLLABLE TYI +D2F0 ; LV # Lo HANGUL SYLLABLE TI +D30C ; LV # Lo HANGUL SYLLABLE PA +D328 ; LV # Lo HANGUL SYLLABLE PAE +D344 ; LV # Lo HANGUL SYLLABLE PYA +D360 ; LV # Lo HANGUL SYLLABLE PYAE +D37C ; LV # Lo HANGUL SYLLABLE PEO +D398 ; LV # Lo HANGUL SYLLABLE PE +D3B4 ; LV # Lo HANGUL SYLLABLE PYEO +D3D0 ; LV # Lo HANGUL SYLLABLE PYE +D3EC ; LV # Lo HANGUL SYLLABLE PO +D408 ; LV # Lo HANGUL SYLLABLE PWA +D424 ; LV # Lo HANGUL SYLLABLE PWAE +D440 ; LV # Lo HANGUL SYLLABLE POE +D45C ; LV # Lo HANGUL SYLLABLE PYO +D478 ; LV # Lo HANGUL SYLLABLE PU +D494 ; LV # Lo HANGUL SYLLABLE PWEO +D4B0 ; LV # Lo HANGUL SYLLABLE PWE +D4CC ; LV # Lo HANGUL SYLLABLE PWI +D4E8 ; LV # Lo HANGUL SYLLABLE PYU +D504 ; LV # Lo HANGUL SYLLABLE PEU +D520 ; LV # Lo HANGUL SYLLABLE PYI +D53C ; LV # Lo HANGUL SYLLABLE PI +D558 ; LV # Lo HANGUL SYLLABLE HA +D574 ; LV # Lo HANGUL SYLLABLE HAE +D590 ; LV # Lo HANGUL SYLLABLE HYA +D5AC ; LV # Lo HANGUL SYLLABLE HYAE +D5C8 ; LV # Lo HANGUL SYLLABLE HEO +D5E4 ; LV # Lo HANGUL SYLLABLE HE +D600 ; LV # Lo HANGUL SYLLABLE HYEO +D61C ; LV # Lo HANGUL SYLLABLE HYE +D638 ; LV # Lo HANGUL SYLLABLE HO +D654 ; LV # Lo HANGUL SYLLABLE HWA +D670 ; LV # Lo HANGUL SYLLABLE HWAE +D68C ; LV # Lo HANGUL SYLLABLE HOE +D6A8 ; LV # Lo HANGUL SYLLABLE HYO +D6C4 ; LV # Lo HANGUL SYLLABLE HU +D6E0 ; LV # Lo HANGUL SYLLABLE HWEO +D6FC ; LV # Lo HANGUL SYLLABLE HWE +D718 ; LV # Lo HANGUL SYLLABLE HWI +D734 ; LV # Lo HANGUL SYLLABLE HYU +D750 ; LV # Lo HANGUL SYLLABLE HEU +D76C ; LV # Lo HANGUL SYLLABLE HYI +D788 ; LV # Lo HANGUL SYLLABLE HI + +# Total code points: 399 + +# ================================================ + +AC01..AC1B ; LVT # Lo [27] HANGUL SYLLABLE GAG..HANGUL SYLLABLE GAH +AC1D..AC37 ; LVT # Lo [27] HANGUL SYLLABLE GAEG..HANGUL SYLLABLE GAEH +AC39..AC53 ; LVT # Lo [27] HANGUL SYLLABLE GYAG..HANGUL SYLLABLE GYAH +AC55..AC6F ; LVT # Lo [27] HANGUL SYLLABLE GYAEG..HANGUL SYLLABLE GYAEH +AC71..AC8B ; LVT # Lo [27] HANGUL SYLLABLE GEOG..HANGUL SYLLABLE GEOH +AC8D..ACA7 ; LVT # Lo [27] HANGUL SYLLABLE GEG..HANGUL SYLLABLE GEH +ACA9..ACC3 ; LVT # Lo [27] HANGUL SYLLABLE GYEOG..HANGUL SYLLABLE GYEOH +ACC5..ACDF ; LVT # Lo [27] HANGUL SYLLABLE GYEG..HANGUL SYLLABLE GYEH +ACE1..ACFB ; LVT # Lo [27] HANGUL SYLLABLE GOG..HANGUL SYLLABLE GOH +ACFD..AD17 ; LVT # Lo [27] HANGUL SYLLABLE GWAG..HANGUL SYLLABLE GWAH +AD19..AD33 ; LVT # Lo [27] HANGUL SYLLABLE GWAEG..HANGUL SYLLABLE GWAEH +AD35..AD4F ; LVT # Lo [27] HANGUL SYLLABLE GOEG..HANGUL SYLLABLE GOEH +AD51..AD6B ; LVT # Lo [27] HANGUL SYLLABLE GYOG..HANGUL SYLLABLE GYOH +AD6D..AD87 ; LVT # Lo [27] HANGUL SYLLABLE GUG..HANGUL SYLLABLE GUH +AD89..ADA3 ; LVT # Lo [27] HANGUL SYLLABLE GWEOG..HANGUL SYLLABLE GWEOH +ADA5..ADBF ; LVT # Lo [27] HANGUL SYLLABLE GWEG..HANGUL SYLLABLE GWEH +ADC1..ADDB ; LVT # Lo [27] HANGUL SYLLABLE GWIG..HANGUL SYLLABLE GWIH +ADDD..ADF7 ; LVT # Lo [27] HANGUL SYLLABLE GYUG..HANGUL SYLLABLE GYUH +ADF9..AE13 ; LVT # Lo [27] HANGUL SYLLABLE GEUG..HANGUL SYLLABLE GEUH +AE15..AE2F ; LVT # Lo [27] HANGUL SYLLABLE GYIG..HANGUL SYLLABLE GYIH +AE31..AE4B ; LVT # Lo [27] HANGUL SYLLABLE GIG..HANGUL SYLLABLE GIH +AE4D..AE67 ; LVT # Lo [27] HANGUL SYLLABLE GGAG..HANGUL SYLLABLE GGAH +AE69..AE83 ; LVT # Lo [27] HANGUL SYLLABLE GGAEG..HANGUL SYLLABLE GGAEH +AE85..AE9F ; LVT # Lo [27] HANGUL SYLLABLE GGYAG..HANGUL SYLLABLE GGYAH +AEA1..AEBB ; LVT # Lo [27] HANGUL SYLLABLE GGYAEG..HANGUL SYLLABLE GGYAEH +AEBD..AED7 ; LVT # Lo [27] HANGUL SYLLABLE GGEOG..HANGUL SYLLABLE GGEOH +AED9..AEF3 ; LVT # Lo [27] HANGUL SYLLABLE GGEG..HANGUL SYLLABLE GGEH +AEF5..AF0F ; LVT # Lo [27] HANGUL SYLLABLE GGYEOG..HANGUL SYLLABLE GGYEOH +AF11..AF2B ; LVT # Lo [27] HANGUL SYLLABLE GGYEG..HANGUL SYLLABLE GGYEH +AF2D..AF47 ; LVT # Lo [27] HANGUL SYLLABLE GGOG..HANGUL SYLLABLE GGOH +AF49..AF63 ; LVT # Lo [27] HANGUL SYLLABLE GGWAG..HANGUL SYLLABLE GGWAH +AF65..AF7F ; LVT # Lo [27] HANGUL SYLLABLE GGWAEG..HANGUL SYLLABLE GGWAEH +AF81..AF9B ; LVT # Lo [27] HANGUL SYLLABLE GGOEG..HANGUL SYLLABLE GGOEH +AF9D..AFB7 ; LVT # Lo [27] HANGUL SYLLABLE GGYOG..HANGUL SYLLABLE GGYOH +AFB9..AFD3 ; LVT # Lo [27] HANGUL SYLLABLE GGUG..HANGUL SYLLABLE GGUH +AFD5..AFEF ; LVT # Lo [27] HANGUL SYLLABLE GGWEOG..HANGUL SYLLABLE GGWEOH +AFF1..B00B ; LVT # Lo [27] HANGUL SYLLABLE GGWEG..HANGUL SYLLABLE GGWEH +B00D..B027 ; LVT # Lo [27] HANGUL SYLLABLE GGWIG..HANGUL SYLLABLE GGWIH +B029..B043 ; LVT # Lo [27] HANGUL SYLLABLE GGYUG..HANGUL SYLLABLE GGYUH +B045..B05F ; LVT # Lo [27] HANGUL SYLLABLE GGEUG..HANGUL SYLLABLE GGEUH +B061..B07B ; LVT # Lo [27] HANGUL SYLLABLE GGYIG..HANGUL SYLLABLE GGYIH +B07D..B097 ; LVT # Lo [27] HANGUL SYLLABLE GGIG..HANGUL SYLLABLE GGIH +B099..B0B3 ; LVT # Lo [27] HANGUL SYLLABLE NAG..HANGUL SYLLABLE NAH +B0B5..B0CF ; LVT # Lo [27] HANGUL SYLLABLE NAEG..HANGUL SYLLABLE NAEH +B0D1..B0EB ; LVT # Lo [27] HANGUL SYLLABLE NYAG..HANGUL SYLLABLE NYAH +B0ED..B107 ; LVT # Lo [27] HANGUL SYLLABLE NYAEG..HANGUL SYLLABLE NYAEH +B109..B123 ; LVT # Lo [27] HANGUL SYLLABLE NEOG..HANGUL SYLLABLE NEOH +B125..B13F ; LVT # Lo [27] HANGUL SYLLABLE NEG..HANGUL SYLLABLE NEH +B141..B15B ; LVT # Lo [27] HANGUL SYLLABLE NYEOG..HANGUL SYLLABLE NYEOH +B15D..B177 ; LVT # Lo [27] HANGUL SYLLABLE NYEG..HANGUL SYLLABLE NYEH +B179..B193 ; LVT # Lo [27] HANGUL SYLLABLE NOG..HANGUL SYLLABLE NOH +B195..B1AF ; LVT # Lo [27] HANGUL SYLLABLE NWAG..HANGUL SYLLABLE NWAH +B1B1..B1CB ; LVT # Lo [27] HANGUL SYLLABLE NWAEG..HANGUL SYLLABLE NWAEH +B1CD..B1E7 ; LVT # Lo [27] HANGUL SYLLABLE NOEG..HANGUL SYLLABLE NOEH +B1E9..B203 ; LVT # Lo [27] HANGUL SYLLABLE NYOG..HANGUL SYLLABLE NYOH +B205..B21F ; LVT # Lo [27] HANGUL SYLLABLE NUG..HANGUL SYLLABLE NUH +B221..B23B ; LVT # Lo [27] HANGUL SYLLABLE NWEOG..HANGUL SYLLABLE NWEOH +B23D..B257 ; LVT # Lo [27] HANGUL SYLLABLE NWEG..HANGUL SYLLABLE NWEH +B259..B273 ; LVT # Lo [27] HANGUL SYLLABLE NWIG..HANGUL SYLLABLE NWIH +B275..B28F ; LVT # Lo [27] HANGUL SYLLABLE NYUG..HANGUL SYLLABLE NYUH +B291..B2AB ; LVT # Lo [27] HANGUL SYLLABLE NEUG..HANGUL SYLLABLE NEUH +B2AD..B2C7 ; LVT # Lo [27] HANGUL SYLLABLE NYIG..HANGUL SYLLABLE NYIH +B2C9..B2E3 ; LVT # Lo [27] HANGUL SYLLABLE NIG..HANGUL SYLLABLE NIH +B2E5..B2FF ; LVT # Lo [27] HANGUL SYLLABLE DAG..HANGUL SYLLABLE DAH +B301..B31B ; LVT # Lo [27] HANGUL SYLLABLE DAEG..HANGUL SYLLABLE DAEH +B31D..B337 ; LVT # Lo [27] HANGUL SYLLABLE DYAG..HANGUL SYLLABLE DYAH +B339..B353 ; LVT # Lo [27] HANGUL SYLLABLE DYAEG..HANGUL SYLLABLE DYAEH +B355..B36F ; LVT # Lo [27] HANGUL SYLLABLE DEOG..HANGUL SYLLABLE DEOH +B371..B38B ; LVT # Lo [27] HANGUL SYLLABLE DEG..HANGUL SYLLABLE DEH +B38D..B3A7 ; LVT # Lo [27] HANGUL SYLLABLE DYEOG..HANGUL SYLLABLE DYEOH +B3A9..B3C3 ; LVT # Lo [27] HANGUL SYLLABLE DYEG..HANGUL SYLLABLE DYEH +B3C5..B3DF ; LVT # Lo [27] HANGUL SYLLABLE DOG..HANGUL SYLLABLE DOH +B3E1..B3FB ; LVT # Lo [27] HANGUL SYLLABLE DWAG..HANGUL SYLLABLE DWAH +B3FD..B417 ; LVT # Lo [27] HANGUL SYLLABLE DWAEG..HANGUL SYLLABLE DWAEH +B419..B433 ; LVT # Lo [27] HANGUL SYLLABLE DOEG..HANGUL SYLLABLE DOEH +B435..B44F ; LVT # Lo [27] HANGUL SYLLABLE DYOG..HANGUL SYLLABLE DYOH +B451..B46B ; LVT # Lo [27] HANGUL SYLLABLE DUG..HANGUL SYLLABLE DUH +B46D..B487 ; LVT # Lo [27] HANGUL SYLLABLE DWEOG..HANGUL SYLLABLE DWEOH +B489..B4A3 ; LVT # Lo [27] HANGUL SYLLABLE DWEG..HANGUL SYLLABLE DWEH +B4A5..B4BF ; LVT # Lo [27] HANGUL SYLLABLE DWIG..HANGUL SYLLABLE DWIH +B4C1..B4DB ; LVT # Lo [27] HANGUL SYLLABLE DYUG..HANGUL SYLLABLE DYUH +B4DD..B4F7 ; LVT # Lo [27] HANGUL SYLLABLE DEUG..HANGUL SYLLABLE DEUH +B4F9..B513 ; LVT # Lo [27] HANGUL SYLLABLE DYIG..HANGUL SYLLABLE DYIH +B515..B52F ; LVT # Lo [27] HANGUL SYLLABLE DIG..HANGUL SYLLABLE DIH +B531..B54B ; LVT # Lo [27] HANGUL SYLLABLE DDAG..HANGUL SYLLABLE DDAH +B54D..B567 ; LVT # Lo [27] HANGUL SYLLABLE DDAEG..HANGUL SYLLABLE DDAEH +B569..B583 ; LVT # Lo [27] HANGUL SYLLABLE DDYAG..HANGUL SYLLABLE DDYAH +B585..B59F ; LVT # Lo [27] HANGUL SYLLABLE DDYAEG..HANGUL SYLLABLE DDYAEH +B5A1..B5BB ; LVT # Lo [27] HANGUL SYLLABLE DDEOG..HANGUL SYLLABLE DDEOH +B5BD..B5D7 ; LVT # Lo [27] HANGUL SYLLABLE DDEG..HANGUL SYLLABLE DDEH +B5D9..B5F3 ; LVT # Lo [27] HANGUL SYLLABLE DDYEOG..HANGUL SYLLABLE DDYEOH +B5F5..B60F ; LVT # Lo [27] HANGUL SYLLABLE DDYEG..HANGUL SYLLABLE DDYEH +B611..B62B ; LVT # Lo [27] HANGUL SYLLABLE DDOG..HANGUL SYLLABLE DDOH +B62D..B647 ; LVT # Lo [27] HANGUL SYLLABLE DDWAG..HANGUL SYLLABLE DDWAH +B649..B663 ; LVT # Lo [27] HANGUL SYLLABLE DDWAEG..HANGUL SYLLABLE DDWAEH +B665..B67F ; LVT # Lo [27] HANGUL SYLLABLE DDOEG..HANGUL SYLLABLE DDOEH +B681..B69B ; LVT # Lo [27] HANGUL SYLLABLE DDYOG..HANGUL SYLLABLE DDYOH +B69D..B6B7 ; LVT # Lo [27] HANGUL SYLLABLE DDUG..HANGUL SYLLABLE DDUH +B6B9..B6D3 ; LVT # Lo [27] HANGUL SYLLABLE DDWEOG..HANGUL SYLLABLE DDWEOH +B6D5..B6EF ; LVT # Lo [27] HANGUL SYLLABLE DDWEG..HANGUL SYLLABLE DDWEH +B6F1..B70B ; LVT # Lo [27] HANGUL SYLLABLE DDWIG..HANGUL SYLLABLE DDWIH +B70D..B727 ; LVT # Lo [27] HANGUL SYLLABLE DDYUG..HANGUL SYLLABLE DDYUH +B729..B743 ; LVT # Lo [27] HANGUL SYLLABLE DDEUG..HANGUL SYLLABLE DDEUH +B745..B75F ; LVT # Lo [27] HANGUL SYLLABLE DDYIG..HANGUL SYLLABLE DDYIH +B761..B77B ; LVT # Lo [27] HANGUL SYLLABLE DDIG..HANGUL SYLLABLE DDIH +B77D..B797 ; LVT # Lo [27] HANGUL SYLLABLE RAG..HANGUL SYLLABLE RAH +B799..B7B3 ; LVT # Lo [27] HANGUL SYLLABLE RAEG..HANGUL SYLLABLE RAEH +B7B5..B7CF ; LVT # Lo [27] HANGUL SYLLABLE RYAG..HANGUL SYLLABLE RYAH +B7D1..B7EB ; LVT # Lo [27] HANGUL SYLLABLE RYAEG..HANGUL SYLLABLE RYAEH +B7ED..B807 ; LVT # Lo [27] HANGUL SYLLABLE REOG..HANGUL SYLLABLE REOH +B809..B823 ; LVT # Lo [27] HANGUL SYLLABLE REG..HANGUL SYLLABLE REH +B825..B83F ; LVT # Lo [27] HANGUL SYLLABLE RYEOG..HANGUL SYLLABLE RYEOH +B841..B85B ; LVT # Lo [27] HANGUL SYLLABLE RYEG..HANGUL SYLLABLE RYEH +B85D..B877 ; LVT # Lo [27] HANGUL SYLLABLE ROG..HANGUL SYLLABLE ROH +B879..B893 ; LVT # Lo [27] HANGUL SYLLABLE RWAG..HANGUL SYLLABLE RWAH +B895..B8AF ; LVT # Lo [27] HANGUL SYLLABLE RWAEG..HANGUL SYLLABLE RWAEH +B8B1..B8CB ; LVT # Lo [27] HANGUL SYLLABLE ROEG..HANGUL SYLLABLE ROEH +B8CD..B8E7 ; LVT # Lo [27] HANGUL SYLLABLE RYOG..HANGUL SYLLABLE RYOH +B8E9..B903 ; LVT # Lo [27] HANGUL SYLLABLE RUG..HANGUL SYLLABLE RUH +B905..B91F ; LVT # Lo [27] HANGUL SYLLABLE RWEOG..HANGUL SYLLABLE RWEOH +B921..B93B ; LVT # Lo [27] HANGUL SYLLABLE RWEG..HANGUL SYLLABLE RWEH +B93D..B957 ; LVT # Lo [27] HANGUL SYLLABLE RWIG..HANGUL SYLLABLE RWIH +B959..B973 ; LVT # Lo [27] HANGUL SYLLABLE RYUG..HANGUL SYLLABLE RYUH +B975..B98F ; LVT # Lo [27] HANGUL SYLLABLE REUG..HANGUL SYLLABLE REUH +B991..B9AB ; LVT # Lo [27] HANGUL SYLLABLE RYIG..HANGUL SYLLABLE RYIH +B9AD..B9C7 ; LVT # Lo [27] HANGUL SYLLABLE RIG..HANGUL SYLLABLE RIH +B9C9..B9E3 ; LVT # Lo [27] HANGUL SYLLABLE MAG..HANGUL SYLLABLE MAH +B9E5..B9FF ; LVT # Lo [27] HANGUL SYLLABLE MAEG..HANGUL SYLLABLE MAEH +BA01..BA1B ; LVT # Lo [27] HANGUL SYLLABLE MYAG..HANGUL SYLLABLE MYAH +BA1D..BA37 ; LVT # Lo [27] HANGUL SYLLABLE MYAEG..HANGUL SYLLABLE MYAEH +BA39..BA53 ; LVT # Lo [27] HANGUL SYLLABLE MEOG..HANGUL SYLLABLE MEOH +BA55..BA6F ; LVT # Lo [27] HANGUL SYLLABLE MEG..HANGUL SYLLABLE MEH +BA71..BA8B ; LVT # Lo [27] HANGUL SYLLABLE MYEOG..HANGUL SYLLABLE MYEOH +BA8D..BAA7 ; LVT # Lo [27] HANGUL SYLLABLE MYEG..HANGUL SYLLABLE MYEH +BAA9..BAC3 ; LVT # Lo [27] HANGUL SYLLABLE MOG..HANGUL SYLLABLE MOH +BAC5..BADF ; LVT # Lo [27] HANGUL SYLLABLE MWAG..HANGUL SYLLABLE MWAH +BAE1..BAFB ; LVT # Lo [27] HANGUL SYLLABLE MWAEG..HANGUL SYLLABLE MWAEH +BAFD..BB17 ; LVT # Lo [27] HANGUL SYLLABLE MOEG..HANGUL SYLLABLE MOEH +BB19..BB33 ; LVT # Lo [27] HANGUL SYLLABLE MYOG..HANGUL SYLLABLE MYOH +BB35..BB4F ; LVT # Lo [27] HANGUL SYLLABLE MUG..HANGUL SYLLABLE MUH +BB51..BB6B ; LVT # Lo [27] HANGUL SYLLABLE MWEOG..HANGUL SYLLABLE MWEOH +BB6D..BB87 ; LVT # Lo [27] HANGUL SYLLABLE MWEG..HANGUL SYLLABLE MWEH +BB89..BBA3 ; LVT # Lo [27] HANGUL SYLLABLE MWIG..HANGUL SYLLABLE MWIH +BBA5..BBBF ; LVT # Lo [27] HANGUL SYLLABLE MYUG..HANGUL SYLLABLE MYUH +BBC1..BBDB ; LVT # Lo [27] HANGUL SYLLABLE MEUG..HANGUL SYLLABLE MEUH +BBDD..BBF7 ; LVT # Lo [27] HANGUL SYLLABLE MYIG..HANGUL SYLLABLE MYIH +BBF9..BC13 ; LVT # Lo [27] HANGUL SYLLABLE MIG..HANGUL SYLLABLE MIH +BC15..BC2F ; LVT # Lo [27] HANGUL SYLLABLE BAG..HANGUL SYLLABLE BAH +BC31..BC4B ; LVT # Lo [27] HANGUL SYLLABLE BAEG..HANGUL SYLLABLE BAEH +BC4D..BC67 ; LVT # Lo [27] HANGUL SYLLABLE BYAG..HANGUL SYLLABLE BYAH +BC69..BC83 ; LVT # Lo [27] HANGUL SYLLABLE BYAEG..HANGUL SYLLABLE BYAEH +BC85..BC9F ; LVT # Lo [27] HANGUL SYLLABLE BEOG..HANGUL SYLLABLE BEOH +BCA1..BCBB ; LVT # Lo [27] HANGUL SYLLABLE BEG..HANGUL SYLLABLE BEH +BCBD..BCD7 ; LVT # Lo [27] HANGUL SYLLABLE BYEOG..HANGUL SYLLABLE BYEOH +BCD9..BCF3 ; LVT # Lo [27] HANGUL SYLLABLE BYEG..HANGUL SYLLABLE BYEH +BCF5..BD0F ; LVT # Lo [27] HANGUL SYLLABLE BOG..HANGUL SYLLABLE BOH +BD11..BD2B ; LVT # Lo [27] HANGUL SYLLABLE BWAG..HANGUL SYLLABLE BWAH +BD2D..BD47 ; LVT # Lo [27] HANGUL SYLLABLE BWAEG..HANGUL SYLLABLE BWAEH +BD49..BD63 ; LVT # Lo [27] HANGUL SYLLABLE BOEG..HANGUL SYLLABLE BOEH +BD65..BD7F ; LVT # Lo [27] HANGUL SYLLABLE BYOG..HANGUL SYLLABLE BYOH +BD81..BD9B ; LVT # Lo [27] HANGUL SYLLABLE BUG..HANGUL SYLLABLE BUH +BD9D..BDB7 ; LVT # Lo [27] HANGUL SYLLABLE BWEOG..HANGUL SYLLABLE BWEOH +BDB9..BDD3 ; LVT # Lo [27] HANGUL SYLLABLE BWEG..HANGUL SYLLABLE BWEH +BDD5..BDEF ; LVT # Lo [27] HANGUL SYLLABLE BWIG..HANGUL SYLLABLE BWIH +BDF1..BE0B ; LVT # Lo [27] HANGUL SYLLABLE BYUG..HANGUL SYLLABLE BYUH +BE0D..BE27 ; LVT # Lo [27] HANGUL SYLLABLE BEUG..HANGUL SYLLABLE BEUH +BE29..BE43 ; LVT # Lo [27] HANGUL SYLLABLE BYIG..HANGUL SYLLABLE BYIH +BE45..BE5F ; LVT # Lo [27] HANGUL SYLLABLE BIG..HANGUL SYLLABLE BIH +BE61..BE7B ; LVT # Lo [27] HANGUL SYLLABLE BBAG..HANGUL SYLLABLE BBAH +BE7D..BE97 ; LVT # Lo [27] HANGUL SYLLABLE BBAEG..HANGUL SYLLABLE BBAEH +BE99..BEB3 ; LVT # Lo [27] HANGUL SYLLABLE BBYAG..HANGUL SYLLABLE BBYAH +BEB5..BECF ; LVT # Lo [27] HANGUL SYLLABLE BBYAEG..HANGUL SYLLABLE BBYAEH +BED1..BEEB ; LVT # Lo [27] HANGUL SYLLABLE BBEOG..HANGUL SYLLABLE BBEOH +BEED..BF07 ; LVT # Lo [27] HANGUL SYLLABLE BBEG..HANGUL SYLLABLE BBEH +BF09..BF23 ; LVT # Lo [27] HANGUL SYLLABLE BBYEOG..HANGUL SYLLABLE BBYEOH +BF25..BF3F ; LVT # Lo [27] HANGUL SYLLABLE BBYEG..HANGUL SYLLABLE BBYEH +BF41..BF5B ; LVT # Lo [27] HANGUL SYLLABLE BBOG..HANGUL SYLLABLE BBOH +BF5D..BF77 ; LVT # Lo [27] HANGUL SYLLABLE BBWAG..HANGUL SYLLABLE BBWAH +BF79..BF93 ; LVT # Lo [27] HANGUL SYLLABLE BBWAEG..HANGUL SYLLABLE BBWAEH +BF95..BFAF ; LVT # Lo [27] HANGUL SYLLABLE BBOEG..HANGUL SYLLABLE BBOEH +BFB1..BFCB ; LVT # Lo [27] HANGUL SYLLABLE BBYOG..HANGUL SYLLABLE BBYOH +BFCD..BFE7 ; LVT # Lo [27] HANGUL SYLLABLE BBUG..HANGUL SYLLABLE BBUH +BFE9..C003 ; LVT # Lo [27] HANGUL SYLLABLE BBWEOG..HANGUL SYLLABLE BBWEOH +C005..C01F ; LVT # Lo [27] HANGUL SYLLABLE BBWEG..HANGUL SYLLABLE BBWEH +C021..C03B ; LVT # Lo [27] HANGUL SYLLABLE BBWIG..HANGUL SYLLABLE BBWIH +C03D..C057 ; LVT # Lo [27] HANGUL SYLLABLE BBYUG..HANGUL SYLLABLE BBYUH +C059..C073 ; LVT # Lo [27] HANGUL SYLLABLE BBEUG..HANGUL SYLLABLE BBEUH +C075..C08F ; LVT # Lo [27] HANGUL SYLLABLE BBYIG..HANGUL SYLLABLE BBYIH +C091..C0AB ; LVT # Lo [27] HANGUL SYLLABLE BBIG..HANGUL SYLLABLE BBIH +C0AD..C0C7 ; LVT # Lo [27] HANGUL SYLLABLE SAG..HANGUL SYLLABLE SAH +C0C9..C0E3 ; LVT # Lo [27] HANGUL SYLLABLE SAEG..HANGUL SYLLABLE SAEH +C0E5..C0FF ; LVT # Lo [27] HANGUL SYLLABLE SYAG..HANGUL SYLLABLE SYAH +C101..C11B ; LVT # Lo [27] HANGUL SYLLABLE SYAEG..HANGUL SYLLABLE SYAEH +C11D..C137 ; LVT # Lo [27] HANGUL SYLLABLE SEOG..HANGUL SYLLABLE SEOH +C139..C153 ; LVT # Lo [27] HANGUL SYLLABLE SEG..HANGUL SYLLABLE SEH +C155..C16F ; LVT # Lo [27] HANGUL SYLLABLE SYEOG..HANGUL SYLLABLE SYEOH +C171..C18B ; LVT # Lo [27] HANGUL SYLLABLE SYEG..HANGUL SYLLABLE SYEH +C18D..C1A7 ; LVT # Lo [27] HANGUL SYLLABLE SOG..HANGUL SYLLABLE SOH +C1A9..C1C3 ; LVT # Lo [27] HANGUL SYLLABLE SWAG..HANGUL SYLLABLE SWAH +C1C5..C1DF ; LVT # Lo [27] HANGUL SYLLABLE SWAEG..HANGUL SYLLABLE SWAEH +C1E1..C1FB ; LVT # Lo [27] HANGUL SYLLABLE SOEG..HANGUL SYLLABLE SOEH +C1FD..C217 ; LVT # Lo [27] HANGUL SYLLABLE SYOG..HANGUL SYLLABLE SYOH +C219..C233 ; LVT # Lo [27] HANGUL SYLLABLE SUG..HANGUL SYLLABLE SUH +C235..C24F ; LVT # Lo [27] HANGUL SYLLABLE SWEOG..HANGUL SYLLABLE SWEOH +C251..C26B ; LVT # Lo [27] HANGUL SYLLABLE SWEG..HANGUL SYLLABLE SWEH +C26D..C287 ; LVT # Lo [27] HANGUL SYLLABLE SWIG..HANGUL SYLLABLE SWIH +C289..C2A3 ; LVT # Lo [27] HANGUL SYLLABLE SYUG..HANGUL SYLLABLE SYUH +C2A5..C2BF ; LVT # Lo [27] HANGUL SYLLABLE SEUG..HANGUL SYLLABLE SEUH +C2C1..C2DB ; LVT # Lo [27] HANGUL SYLLABLE SYIG..HANGUL SYLLABLE SYIH +C2DD..C2F7 ; LVT # Lo [27] HANGUL SYLLABLE SIG..HANGUL SYLLABLE SIH +C2F9..C313 ; LVT # Lo [27] HANGUL SYLLABLE SSAG..HANGUL SYLLABLE SSAH +C315..C32F ; LVT # Lo [27] HANGUL SYLLABLE SSAEG..HANGUL SYLLABLE SSAEH +C331..C34B ; LVT # Lo [27] HANGUL SYLLABLE SSYAG..HANGUL SYLLABLE SSYAH +C34D..C367 ; LVT # Lo [27] HANGUL SYLLABLE SSYAEG..HANGUL SYLLABLE SSYAEH +C369..C383 ; LVT # Lo [27] HANGUL SYLLABLE SSEOG..HANGUL SYLLABLE SSEOH +C385..C39F ; LVT # Lo [27] HANGUL SYLLABLE SSEG..HANGUL SYLLABLE SSEH +C3A1..C3BB ; LVT # Lo [27] HANGUL SYLLABLE SSYEOG..HANGUL SYLLABLE SSYEOH +C3BD..C3D7 ; LVT # Lo [27] HANGUL SYLLABLE SSYEG..HANGUL SYLLABLE SSYEH +C3D9..C3F3 ; LVT # Lo [27] HANGUL SYLLABLE SSOG..HANGUL SYLLABLE SSOH +C3F5..C40F ; LVT # Lo [27] HANGUL SYLLABLE SSWAG..HANGUL SYLLABLE SSWAH +C411..C42B ; LVT # Lo [27] HANGUL SYLLABLE SSWAEG..HANGUL SYLLABLE SSWAEH +C42D..C447 ; LVT # Lo [27] HANGUL SYLLABLE SSOEG..HANGUL SYLLABLE SSOEH +C449..C463 ; LVT # Lo [27] HANGUL SYLLABLE SSYOG..HANGUL SYLLABLE SSYOH +C465..C47F ; LVT # Lo [27] HANGUL SYLLABLE SSUG..HANGUL SYLLABLE SSUH +C481..C49B ; LVT # Lo [27] HANGUL SYLLABLE SSWEOG..HANGUL SYLLABLE SSWEOH +C49D..C4B7 ; LVT # Lo [27] HANGUL SYLLABLE SSWEG..HANGUL SYLLABLE SSWEH +C4B9..C4D3 ; LVT # Lo [27] HANGUL SYLLABLE SSWIG..HANGUL SYLLABLE SSWIH +C4D5..C4EF ; LVT # Lo [27] HANGUL SYLLABLE SSYUG..HANGUL SYLLABLE SSYUH +C4F1..C50B ; LVT # Lo [27] HANGUL SYLLABLE SSEUG..HANGUL SYLLABLE SSEUH +C50D..C527 ; LVT # Lo [27] HANGUL SYLLABLE SSYIG..HANGUL SYLLABLE SSYIH +C529..C543 ; LVT # Lo [27] HANGUL SYLLABLE SSIG..HANGUL SYLLABLE SSIH +C545..C55F ; LVT # Lo [27] HANGUL SYLLABLE AG..HANGUL SYLLABLE AH +C561..C57B ; LVT # Lo [27] HANGUL SYLLABLE AEG..HANGUL SYLLABLE AEH +C57D..C597 ; LVT # Lo [27] HANGUL SYLLABLE YAG..HANGUL SYLLABLE YAH +C599..C5B3 ; LVT # Lo [27] HANGUL SYLLABLE YAEG..HANGUL SYLLABLE YAEH +C5B5..C5CF ; LVT # Lo [27] HANGUL SYLLABLE EOG..HANGUL SYLLABLE EOH +C5D1..C5EB ; LVT # Lo [27] HANGUL SYLLABLE EG..HANGUL SYLLABLE EH +C5ED..C607 ; LVT # Lo [27] HANGUL SYLLABLE YEOG..HANGUL SYLLABLE YEOH +C609..C623 ; LVT # Lo [27] HANGUL SYLLABLE YEG..HANGUL SYLLABLE YEH +C625..C63F ; LVT # Lo [27] HANGUL SYLLABLE OG..HANGUL SYLLABLE OH +C641..C65B ; LVT # Lo [27] HANGUL SYLLABLE WAG..HANGUL SYLLABLE WAH +C65D..C677 ; LVT # Lo [27] HANGUL SYLLABLE WAEG..HANGUL SYLLABLE WAEH +C679..C693 ; LVT # Lo [27] HANGUL SYLLABLE OEG..HANGUL SYLLABLE OEH +C695..C6AF ; LVT # Lo [27] HANGUL SYLLABLE YOG..HANGUL SYLLABLE YOH +C6B1..C6CB ; LVT # Lo [27] HANGUL SYLLABLE UG..HANGUL SYLLABLE UH +C6CD..C6E7 ; LVT # Lo [27] HANGUL SYLLABLE WEOG..HANGUL SYLLABLE WEOH +C6E9..C703 ; LVT # Lo [27] HANGUL SYLLABLE WEG..HANGUL SYLLABLE WEH +C705..C71F ; LVT # Lo [27] HANGUL SYLLABLE WIG..HANGUL SYLLABLE WIH +C721..C73B ; LVT # Lo [27] HANGUL SYLLABLE YUG..HANGUL SYLLABLE YUH +C73D..C757 ; LVT # Lo [27] HANGUL SYLLABLE EUG..HANGUL SYLLABLE EUH +C759..C773 ; LVT # Lo [27] HANGUL SYLLABLE YIG..HANGUL SYLLABLE YIH +C775..C78F ; LVT # Lo [27] HANGUL SYLLABLE IG..HANGUL SYLLABLE IH +C791..C7AB ; LVT # Lo [27] HANGUL SYLLABLE JAG..HANGUL SYLLABLE JAH +C7AD..C7C7 ; LVT # Lo [27] HANGUL SYLLABLE JAEG..HANGUL SYLLABLE JAEH +C7C9..C7E3 ; LVT # Lo [27] HANGUL SYLLABLE JYAG..HANGUL SYLLABLE JYAH +C7E5..C7FF ; LVT # Lo [27] HANGUL SYLLABLE JYAEG..HANGUL SYLLABLE JYAEH +C801..C81B ; LVT # Lo [27] HANGUL SYLLABLE JEOG..HANGUL SYLLABLE JEOH +C81D..C837 ; LVT # Lo [27] HANGUL SYLLABLE JEG..HANGUL SYLLABLE JEH +C839..C853 ; LVT # Lo [27] HANGUL SYLLABLE JYEOG..HANGUL SYLLABLE JYEOH +C855..C86F ; LVT # Lo [27] HANGUL SYLLABLE JYEG..HANGUL SYLLABLE JYEH +C871..C88B ; LVT # Lo [27] HANGUL SYLLABLE JOG..HANGUL SYLLABLE JOH +C88D..C8A7 ; LVT # Lo [27] HANGUL SYLLABLE JWAG..HANGUL SYLLABLE JWAH +C8A9..C8C3 ; LVT # Lo [27] HANGUL SYLLABLE JWAEG..HANGUL SYLLABLE JWAEH +C8C5..C8DF ; LVT # Lo [27] HANGUL SYLLABLE JOEG..HANGUL SYLLABLE JOEH +C8E1..C8FB ; LVT # Lo [27] HANGUL SYLLABLE JYOG..HANGUL SYLLABLE JYOH +C8FD..C917 ; LVT # Lo [27] HANGUL SYLLABLE JUG..HANGUL SYLLABLE JUH +C919..C933 ; LVT # Lo [27] HANGUL SYLLABLE JWEOG..HANGUL SYLLABLE JWEOH +C935..C94F ; LVT # Lo [27] HANGUL SYLLABLE JWEG..HANGUL SYLLABLE JWEH +C951..C96B ; LVT # Lo [27] HANGUL SYLLABLE JWIG..HANGUL SYLLABLE JWIH +C96D..C987 ; LVT # Lo [27] HANGUL SYLLABLE JYUG..HANGUL SYLLABLE JYUH +C989..C9A3 ; LVT # Lo [27] HANGUL SYLLABLE JEUG..HANGUL SYLLABLE JEUH +C9A5..C9BF ; LVT # Lo [27] HANGUL SYLLABLE JYIG..HANGUL SYLLABLE JYIH +C9C1..C9DB ; LVT # Lo [27] HANGUL SYLLABLE JIG..HANGUL SYLLABLE JIH +C9DD..C9F7 ; LVT # Lo [27] HANGUL SYLLABLE JJAG..HANGUL SYLLABLE JJAH +C9F9..CA13 ; LVT # Lo [27] HANGUL SYLLABLE JJAEG..HANGUL SYLLABLE JJAEH +CA15..CA2F ; LVT # Lo [27] HANGUL SYLLABLE JJYAG..HANGUL SYLLABLE JJYAH +CA31..CA4B ; LVT # Lo [27] HANGUL SYLLABLE JJYAEG..HANGUL SYLLABLE JJYAEH +CA4D..CA67 ; LVT # Lo [27] HANGUL SYLLABLE JJEOG..HANGUL SYLLABLE JJEOH +CA69..CA83 ; LVT # Lo [27] HANGUL SYLLABLE JJEG..HANGUL SYLLABLE JJEH +CA85..CA9F ; LVT # Lo [27] HANGUL SYLLABLE JJYEOG..HANGUL SYLLABLE JJYEOH +CAA1..CABB ; LVT # Lo [27] HANGUL SYLLABLE JJYEG..HANGUL SYLLABLE JJYEH +CABD..CAD7 ; LVT # Lo [27] HANGUL SYLLABLE JJOG..HANGUL SYLLABLE JJOH +CAD9..CAF3 ; LVT # Lo [27] HANGUL SYLLABLE JJWAG..HANGUL SYLLABLE JJWAH +CAF5..CB0F ; LVT # Lo [27] HANGUL SYLLABLE JJWAEG..HANGUL SYLLABLE JJWAEH +CB11..CB2B ; LVT # Lo [27] HANGUL SYLLABLE JJOEG..HANGUL SYLLABLE JJOEH +CB2D..CB47 ; LVT # Lo [27] HANGUL SYLLABLE JJYOG..HANGUL SYLLABLE JJYOH +CB49..CB63 ; LVT # Lo [27] HANGUL SYLLABLE JJUG..HANGUL SYLLABLE JJUH +CB65..CB7F ; LVT # Lo [27] HANGUL SYLLABLE JJWEOG..HANGUL SYLLABLE JJWEOH +CB81..CB9B ; LVT # Lo [27] HANGUL SYLLABLE JJWEG..HANGUL SYLLABLE JJWEH +CB9D..CBB7 ; LVT # Lo [27] HANGUL SYLLABLE JJWIG..HANGUL SYLLABLE JJWIH +CBB9..CBD3 ; LVT # Lo [27] HANGUL SYLLABLE JJYUG..HANGUL SYLLABLE JJYUH +CBD5..CBEF ; LVT # Lo [27] HANGUL SYLLABLE JJEUG..HANGUL SYLLABLE JJEUH +CBF1..CC0B ; LVT # Lo [27] HANGUL SYLLABLE JJYIG..HANGUL SYLLABLE JJYIH +CC0D..CC27 ; LVT # Lo [27] HANGUL SYLLABLE JJIG..HANGUL SYLLABLE JJIH +CC29..CC43 ; LVT # Lo [27] HANGUL SYLLABLE CAG..HANGUL SYLLABLE CAH +CC45..CC5F ; LVT # Lo [27] HANGUL SYLLABLE CAEG..HANGUL SYLLABLE CAEH +CC61..CC7B ; LVT # Lo [27] HANGUL SYLLABLE CYAG..HANGUL SYLLABLE CYAH +CC7D..CC97 ; LVT # Lo [27] HANGUL SYLLABLE CYAEG..HANGUL SYLLABLE CYAEH +CC99..CCB3 ; LVT # Lo [27] HANGUL SYLLABLE CEOG..HANGUL SYLLABLE CEOH +CCB5..CCCF ; LVT # Lo [27] HANGUL SYLLABLE CEG..HANGUL SYLLABLE CEH +CCD1..CCEB ; LVT # Lo [27] HANGUL SYLLABLE CYEOG..HANGUL SYLLABLE CYEOH +CCED..CD07 ; LVT # Lo [27] HANGUL SYLLABLE CYEG..HANGUL SYLLABLE CYEH +CD09..CD23 ; LVT # Lo [27] HANGUL SYLLABLE COG..HANGUL SYLLABLE COH +CD25..CD3F ; LVT # Lo [27] HANGUL SYLLABLE CWAG..HANGUL SYLLABLE CWAH +CD41..CD5B ; LVT # Lo [27] HANGUL SYLLABLE CWAEG..HANGUL SYLLABLE CWAEH +CD5D..CD77 ; LVT # Lo [27] HANGUL SYLLABLE COEG..HANGUL SYLLABLE COEH +CD79..CD93 ; LVT # Lo [27] HANGUL SYLLABLE CYOG..HANGUL SYLLABLE CYOH +CD95..CDAF ; LVT # Lo [27] HANGUL SYLLABLE CUG..HANGUL SYLLABLE CUH +CDB1..CDCB ; LVT # Lo [27] HANGUL SYLLABLE CWEOG..HANGUL SYLLABLE CWEOH +CDCD..CDE7 ; LVT # Lo [27] HANGUL SYLLABLE CWEG..HANGUL SYLLABLE CWEH +CDE9..CE03 ; LVT # Lo [27] HANGUL SYLLABLE CWIG..HANGUL SYLLABLE CWIH +CE05..CE1F ; LVT # Lo [27] HANGUL SYLLABLE CYUG..HANGUL SYLLABLE CYUH +CE21..CE3B ; LVT # Lo [27] HANGUL SYLLABLE CEUG..HANGUL SYLLABLE CEUH +CE3D..CE57 ; LVT # Lo [27] HANGUL SYLLABLE CYIG..HANGUL SYLLABLE CYIH +CE59..CE73 ; LVT # Lo [27] HANGUL SYLLABLE CIG..HANGUL SYLLABLE CIH +CE75..CE8F ; LVT # Lo [27] HANGUL SYLLABLE KAG..HANGUL SYLLABLE KAH +CE91..CEAB ; LVT # Lo [27] HANGUL SYLLABLE KAEG..HANGUL SYLLABLE KAEH +CEAD..CEC7 ; LVT # Lo [27] HANGUL SYLLABLE KYAG..HANGUL SYLLABLE KYAH +CEC9..CEE3 ; LVT # Lo [27] HANGUL SYLLABLE KYAEG..HANGUL SYLLABLE KYAEH +CEE5..CEFF ; LVT # Lo [27] HANGUL SYLLABLE KEOG..HANGUL SYLLABLE KEOH +CF01..CF1B ; LVT # Lo [27] HANGUL SYLLABLE KEG..HANGUL SYLLABLE KEH +CF1D..CF37 ; LVT # Lo [27] HANGUL SYLLABLE KYEOG..HANGUL SYLLABLE KYEOH +CF39..CF53 ; LVT # Lo [27] HANGUL SYLLABLE KYEG..HANGUL SYLLABLE KYEH +CF55..CF6F ; LVT # Lo [27] HANGUL SYLLABLE KOG..HANGUL SYLLABLE KOH +CF71..CF8B ; LVT # Lo [27] HANGUL SYLLABLE KWAG..HANGUL SYLLABLE KWAH +CF8D..CFA7 ; LVT # Lo [27] HANGUL SYLLABLE KWAEG..HANGUL SYLLABLE KWAEH +CFA9..CFC3 ; LVT # Lo [27] HANGUL SYLLABLE KOEG..HANGUL SYLLABLE KOEH +CFC5..CFDF ; LVT # Lo [27] HANGUL SYLLABLE KYOG..HANGUL SYLLABLE KYOH +CFE1..CFFB ; LVT # Lo [27] HANGUL SYLLABLE KUG..HANGUL SYLLABLE KUH +CFFD..D017 ; LVT # Lo [27] HANGUL SYLLABLE KWEOG..HANGUL SYLLABLE KWEOH +D019..D033 ; LVT # Lo [27] HANGUL SYLLABLE KWEG..HANGUL SYLLABLE KWEH +D035..D04F ; LVT # Lo [27] HANGUL SYLLABLE KWIG..HANGUL SYLLABLE KWIH +D051..D06B ; LVT # Lo [27] HANGUL SYLLABLE KYUG..HANGUL SYLLABLE KYUH +D06D..D087 ; LVT # Lo [27] HANGUL SYLLABLE KEUG..HANGUL SYLLABLE KEUH +D089..D0A3 ; LVT # Lo [27] HANGUL SYLLABLE KYIG..HANGUL SYLLABLE KYIH +D0A5..D0BF ; LVT # Lo [27] HANGUL SYLLABLE KIG..HANGUL SYLLABLE KIH +D0C1..D0DB ; LVT # Lo [27] HANGUL SYLLABLE TAG..HANGUL SYLLABLE TAH +D0DD..D0F7 ; LVT # Lo [27] HANGUL SYLLABLE TAEG..HANGUL SYLLABLE TAEH +D0F9..D113 ; LVT # Lo [27] HANGUL SYLLABLE TYAG..HANGUL SYLLABLE TYAH +D115..D12F ; LVT # Lo [27] HANGUL SYLLABLE TYAEG..HANGUL SYLLABLE TYAEH +D131..D14B ; LVT # Lo [27] HANGUL SYLLABLE TEOG..HANGUL SYLLABLE TEOH +D14D..D167 ; LVT # Lo [27] HANGUL SYLLABLE TEG..HANGUL SYLLABLE TEH +D169..D183 ; LVT # Lo [27] HANGUL SYLLABLE TYEOG..HANGUL SYLLABLE TYEOH +D185..D19F ; LVT # Lo [27] HANGUL SYLLABLE TYEG..HANGUL SYLLABLE TYEH +D1A1..D1BB ; LVT # Lo [27] HANGUL SYLLABLE TOG..HANGUL SYLLABLE TOH +D1BD..D1D7 ; LVT # Lo [27] HANGUL SYLLABLE TWAG..HANGUL SYLLABLE TWAH +D1D9..D1F3 ; LVT # Lo [27] HANGUL SYLLABLE TWAEG..HANGUL SYLLABLE TWAEH +D1F5..D20F ; LVT # Lo [27] HANGUL SYLLABLE TOEG..HANGUL SYLLABLE TOEH +D211..D22B ; LVT # Lo [27] HANGUL SYLLABLE TYOG..HANGUL SYLLABLE TYOH +D22D..D247 ; LVT # Lo [27] HANGUL SYLLABLE TUG..HANGUL SYLLABLE TUH +D249..D263 ; LVT # Lo [27] HANGUL SYLLABLE TWEOG..HANGUL SYLLABLE TWEOH +D265..D27F ; LVT # Lo [27] HANGUL SYLLABLE TWEG..HANGUL SYLLABLE TWEH +D281..D29B ; LVT # Lo [27] HANGUL SYLLABLE TWIG..HANGUL SYLLABLE TWIH +D29D..D2B7 ; LVT # Lo [27] HANGUL SYLLABLE TYUG..HANGUL SYLLABLE TYUH +D2B9..D2D3 ; LVT # Lo [27] HANGUL SYLLABLE TEUG..HANGUL SYLLABLE TEUH +D2D5..D2EF ; LVT # Lo [27] HANGUL SYLLABLE TYIG..HANGUL SYLLABLE TYIH +D2F1..D30B ; LVT # Lo [27] HANGUL SYLLABLE TIG..HANGUL SYLLABLE TIH +D30D..D327 ; LVT # Lo [27] HANGUL SYLLABLE PAG..HANGUL SYLLABLE PAH +D329..D343 ; LVT # Lo [27] HANGUL SYLLABLE PAEG..HANGUL SYLLABLE PAEH +D345..D35F ; LVT # Lo [27] HANGUL SYLLABLE PYAG..HANGUL SYLLABLE PYAH +D361..D37B ; LVT # Lo [27] HANGUL SYLLABLE PYAEG..HANGUL SYLLABLE PYAEH +D37D..D397 ; LVT # Lo [27] HANGUL SYLLABLE PEOG..HANGUL SYLLABLE PEOH +D399..D3B3 ; LVT # Lo [27] HANGUL SYLLABLE PEG..HANGUL SYLLABLE PEH +D3B5..D3CF ; LVT # Lo [27] HANGUL SYLLABLE PYEOG..HANGUL SYLLABLE PYEOH +D3D1..D3EB ; LVT # Lo [27] HANGUL SYLLABLE PYEG..HANGUL SYLLABLE PYEH +D3ED..D407 ; LVT # Lo [27] HANGUL SYLLABLE POG..HANGUL SYLLABLE POH +D409..D423 ; LVT # Lo [27] HANGUL SYLLABLE PWAG..HANGUL SYLLABLE PWAH +D425..D43F ; LVT # Lo [27] HANGUL SYLLABLE PWAEG..HANGUL SYLLABLE PWAEH +D441..D45B ; LVT # Lo [27] HANGUL SYLLABLE POEG..HANGUL SYLLABLE POEH +D45D..D477 ; LVT # Lo [27] HANGUL SYLLABLE PYOG..HANGUL SYLLABLE PYOH +D479..D493 ; LVT # Lo [27] HANGUL SYLLABLE PUG..HANGUL SYLLABLE PUH +D495..D4AF ; LVT # Lo [27] HANGUL SYLLABLE PWEOG..HANGUL SYLLABLE PWEOH +D4B1..D4CB ; LVT # Lo [27] HANGUL SYLLABLE PWEG..HANGUL SYLLABLE PWEH +D4CD..D4E7 ; LVT # Lo [27] HANGUL SYLLABLE PWIG..HANGUL SYLLABLE PWIH +D4E9..D503 ; LVT # Lo [27] HANGUL SYLLABLE PYUG..HANGUL SYLLABLE PYUH +D505..D51F ; LVT # Lo [27] HANGUL SYLLABLE PEUG..HANGUL SYLLABLE PEUH +D521..D53B ; LVT # Lo [27] HANGUL SYLLABLE PYIG..HANGUL SYLLABLE PYIH +D53D..D557 ; LVT # Lo [27] HANGUL SYLLABLE PIG..HANGUL SYLLABLE PIH +D559..D573 ; LVT # Lo [27] HANGUL SYLLABLE HAG..HANGUL SYLLABLE HAH +D575..D58F ; LVT # Lo [27] HANGUL SYLLABLE HAEG..HANGUL SYLLABLE HAEH +D591..D5AB ; LVT # Lo [27] HANGUL SYLLABLE HYAG..HANGUL SYLLABLE HYAH +D5AD..D5C7 ; LVT # Lo [27] HANGUL SYLLABLE HYAEG..HANGUL SYLLABLE HYAEH +D5C9..D5E3 ; LVT # Lo [27] HANGUL SYLLABLE HEOG..HANGUL SYLLABLE HEOH +D5E5..D5FF ; LVT # Lo [27] HANGUL SYLLABLE HEG..HANGUL SYLLABLE HEH +D601..D61B ; LVT # Lo [27] HANGUL SYLLABLE HYEOG..HANGUL SYLLABLE HYEOH +D61D..D637 ; LVT # Lo [27] HANGUL SYLLABLE HYEG..HANGUL SYLLABLE HYEH +D639..D653 ; LVT # Lo [27] HANGUL SYLLABLE HOG..HANGUL SYLLABLE HOH +D655..D66F ; LVT # Lo [27] HANGUL SYLLABLE HWAG..HANGUL SYLLABLE HWAH +D671..D68B ; LVT # Lo [27] HANGUL SYLLABLE HWAEG..HANGUL SYLLABLE HWAEH +D68D..D6A7 ; LVT # Lo [27] HANGUL SYLLABLE HOEG..HANGUL SYLLABLE HOEH +D6A9..D6C3 ; LVT # Lo [27] HANGUL SYLLABLE HYOG..HANGUL SYLLABLE HYOH +D6C5..D6DF ; LVT # Lo [27] HANGUL SYLLABLE HUG..HANGUL SYLLABLE HUH +D6E1..D6FB ; LVT # Lo [27] HANGUL SYLLABLE HWEOG..HANGUL SYLLABLE HWEOH +D6FD..D717 ; LVT # Lo [27] HANGUL SYLLABLE HWEG..HANGUL SYLLABLE HWEH +D719..D733 ; LVT # Lo [27] HANGUL SYLLABLE HWIG..HANGUL SYLLABLE HWIH +D735..D74F ; LVT # Lo [27] HANGUL SYLLABLE HYUG..HANGUL SYLLABLE HYUH +D751..D76B ; LVT # Lo [27] HANGUL SYLLABLE HEUG..HANGUL SYLLABLE HEUH +D76D..D787 ; LVT # Lo [27] HANGUL SYLLABLE HYIG..HANGUL SYLLABLE HYIH +D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH + +# Total code points: 10773 + +# ================================================ + +200D ; ZWJ # Cf ZERO WIDTH JOINER + +# Total code points: 1 + +# EOF diff --git a/libcxx/utils/data/unicode/GraphemeBreakTest.txt b/libcxx/utils/data/unicode/GraphemeBreakTest.txt new file mode 100644 --- /dev/null +++ b/libcxx/utils/data/unicode/GraphemeBreakTest.txt @@ -0,0 +1,630 @@ +# GraphemeBreakTest-14.0.0.txt +# Date: 2021-03-08, 06:22:32 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Default Grapheme_Cluster_Break Test +# +# Format: +# (# )? +# contains hex Unicode code points, with +# ÷ wherever there is a break opportunity, and +# × wherever there is not. +# the format can change, but currently it shows: +# - the sample character name +# - (x) the Grapheme_Cluster_Break property value for the sample character +# - [x] the rule that determines whether there is a break or not, +# as listed in the Rules section of GraphemeBreakTest.html +# +# These samples may be extended or changed in the future. +# +÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] (CR) ÷ [0.3] +÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] (LF) ÷ [0.3] +÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] (Control) ÷ [0.3] +÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0020 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0020 × 0308 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] (Other) ÷ [0.3] +÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 000D ÷ 0020 ÷ # ÷ [0.2] (CR) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 000D ÷ 000D ÷ # ÷ [0.2] (CR) ÷ [4.0] (CR) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 000D × 000A ÷ # ÷ [0.2] (CR) × [3.0] (LF) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 000D ÷ 0001 ÷ # ÷ [0.2] (CR) ÷ [4.0] (Control) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 000D ÷ 034F ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 000D ÷ 0308 × 034F ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000D ÷ 0600 ÷ # ÷ [0.2] (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 000D ÷ 0903 ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 1100 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000D ÷ 1160 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000D ÷ 11A8 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000D ÷ AC00 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000D ÷ AC01 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 231A ÷ # ÷ [0.2] (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 000D ÷ 0300 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 200D ÷ # ÷ [0.2] (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 0378 ÷ # ÷ [0.2] (CR) ÷ [4.0] (Other) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 000A ÷ 0020 ÷ # ÷ [0.2] (LF) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 000A ÷ 000D ÷ # ÷ [0.2] (LF) ÷ [4.0] (CR) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 000A ÷ 000A ÷ # ÷ [0.2] (LF) ÷ [4.0] (LF) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 000A ÷ 0001 ÷ # ÷ [0.2] (LF) ÷ [4.0] (Control) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 000A ÷ 034F ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 000A ÷ 0308 × 034F ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000A ÷ 0600 ÷ # ÷ [0.2] (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 000A ÷ 0903 ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 1100 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000A ÷ 1160 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000A ÷ 11A8 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000A ÷ AC00 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000A ÷ AC01 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 231A ÷ # ÷ [0.2] (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 000A ÷ 0300 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 200D ÷ # ÷ [0.2] (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 0378 ÷ # ÷ [0.2] (LF) ÷ [4.0] (Other) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0001 ÷ 0020 ÷ # ÷ [0.2] (Control) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0001 ÷ 000D ÷ # ÷ [0.2] (Control) ÷ [4.0] (CR) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0001 ÷ 000A ÷ # ÷ [0.2] (Control) ÷ [4.0] (LF) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0001 ÷ 0001 ÷ # ÷ [0.2] (Control) ÷ [4.0] (Control) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0001 ÷ 034F ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0001 ÷ 0308 × 034F ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0001 ÷ 0600 ÷ # ÷ [0.2] (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0001 ÷ 0903 ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0001 ÷ 1100 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0001 ÷ 1160 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0001 ÷ AC00 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0001 ÷ AC01 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 231A ÷ # ÷ [0.2] (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0001 ÷ 0300 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 200D ÷ # ÷ [0.2] (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 0378 ÷ # ÷ [0.2] (Control) ÷ [4.0] (Other) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 034F ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 034F × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 034F ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] (CR) ÷ [0.3] +÷ 034F × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 034F ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] (LF) ÷ [0.3] +÷ 034F × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 034F ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] (Control) ÷ [0.3] +÷ 034F × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 034F × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 034F × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 034F ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 034F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 034F ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 034F × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 034F ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 034F × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 034F ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 034F × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 034F ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 034F × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 034F ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] (Other) ÷ [0.3] +÷ 034F × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] (CR) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] (LF) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] (Control) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 1F1E6 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1F1E6 × 0308 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] (Other) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0600 × 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3] +÷ 0600 × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0600 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] (CR) ÷ [0.3] +÷ 0600 × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0600 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] (LF) ÷ [0.3] +÷ 0600 × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0600 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] (Control) ÷ [0.3] +÷ 0600 × 0308 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0600 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0600 × 0308 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0600 × 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0600 × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0600 × 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0600 × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0600 × AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3] +÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] (Other) ÷ [0.3] +÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (CR) ÷ [0.3] +÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (LF) ÷ [0.3] +÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (Control) ÷ [0.3] +÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (CR) ÷ [0.3] +÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (LF) ÷ [0.3] +÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (Control) ÷ [0.3] +÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 1100 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1100 × 0308 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] (Other) ÷ [0.3] +÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (CR) ÷ [0.3] +÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (LF) ÷ [0.3] +÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] (Control) ÷ [0.3] +÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 1160 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1160 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] (Other) ÷ [0.3] +÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (CR) ÷ [0.3] +÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (LF) ÷ [0.3] +÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] (Control) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 11A8 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 11A8 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] (Other) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (CR) ÷ [0.3] +÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (LF) ÷ [0.3] +÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] (Control) ÷ [0.3] +÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ AC00 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ AC00 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] (Other) ÷ [0.3] +÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (CR) ÷ [0.3] +÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (LF) ÷ [0.3] +÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] (Control) ÷ [0.3] +÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ AC01 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ AC01 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] (Other) ÷ [0.3] +÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] (CR) ÷ [0.3] +÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] (LF) ÷ [0.3] +÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] (Control) ÷ [0.3] +÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 231A × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 231A × 0308 × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 231A ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 231A × 0308 ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 231A ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 231A × 0308 ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 231A ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 231A × 0308 ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 231A ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 231A × 0308 ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 231A ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] (Other) ÷ [0.3] +÷ 231A × 0308 ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0300 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0300 × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 200D × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 200D × 0308 × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0378 ÷ 0020 ÷ # ÷ [0.2] (Other) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0378 ÷ 000D ÷ # ÷ [0.2] (Other) ÷ [5.0] (CR) ÷ [0.3] +÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0378 ÷ 000A ÷ # ÷ [0.2] (Other) ÷ [5.0] (LF) ÷ [0.3] +÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0378 ÷ 0001 ÷ # ÷ [0.2] (Other) ÷ [5.0] (Control) ÷ [0.3] +÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0378 × 034F ÷ # ÷ [0.2] (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0378 × 0308 × 034F ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0378 ÷ 0600 ÷ # ÷ [0.2] (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0378 × 0903 ÷ # ÷ [0.2] (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 ÷ 1100 ÷ # ÷ [0.2] (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 ÷ 1160 ÷ # ÷ [0.2] (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 ÷ AC00 ÷ # ÷ [0.2] (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 ÷ AC01 ÷ # ÷ [0.2] (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 ÷ 231A ÷ # ÷ [0.2] (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0378 × 0300 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0378 × 200D ÷ # ÷ [0.2] (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0378 ÷ 0378 ÷ # ÷ [0.2] (Other) ÷ [999.0] (Other) ÷ [0.3] +÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] (CR) × [3.0] (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3] +÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3] +÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3] +÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3] +÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3] +÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3] +÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3] +÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3] +# +# Lines: 602 +# +# EOF diff --git a/libcxx/utils/data/unicode/README.txt b/libcxx/utils/data/unicode/README.txt new file mode 100644 --- /dev/null +++ b/libcxx/utils/data/unicode/README.txt @@ -0,0 +1,24 @@ +Contains various Unicode data files used in the library for Unicode support + +To update all files to the last published Unicode version issue the following +command in the directory containing this file. + +wget \ + https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt \ + https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt \ + https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt + +Afterwards build the `libcxx-generate-files` target to update the generated +Unicode files. + +GraphemeBreakProperty.txt +Source: https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt +Usage: libcxx/utils/generate_extended_grapheme_cluster_table.py + +emoji-data.txt +Source: https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt +Usage: libcxx/utils/generate_extended_grapheme_cluster_table.py + +GraphemeBreakTest.txt +Source: https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt +Usage: libcxx/utils/generate_extended_grapheme_cluster_test.py diff --git a/libcxx/utils/data/unicode/emoji-data.txt b/libcxx/utils/data/unicode/emoji-data.txt new file mode 100644 --- /dev/null +++ b/libcxx/utils/data/unicode/emoji-data.txt @@ -0,0 +1,1297 @@ +# emoji-data-14.0.0.txt +# Date: 2021-08-26, 17:22:22 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Emoji Data for UTS #51 +# Used with Emoji Version 14.0 and subsequent minor revisions (if any) +# +# For documentation and usage, see http://www.unicode.org/reports/tr51 +# +# Format: +# ; # +# Note: there is no guarantee as to the structure of whitespace or comments +# +# Characters and sequences are listed in code point order. Users should be shown a more natural order. +# See the CLDR collation order for Emoji. + + +# ================================================ + +# All omitted code points have Emoji=No +# @missing: 0000..10FFFF ; Emoji ; No + +0023 ; Emoji # E0.0 [1] (#️) hash sign +002A ; Emoji # E0.0 [1] (*️) asterisk +0030..0039 ; Emoji # E0.0 [10] (0️..9️) digit zero..digit nine +00A9 ; Emoji # E0.6 [1] (©️) copyright +00AE ; Emoji # E0.6 [1] (®️) registered +203C ; Emoji # E0.6 [1] (‼️) double exclamation mark +2049 ; Emoji # E0.6 [1] (⁉️) exclamation question mark +2122 ; Emoji # E0.6 [1] (™️) trade mark +2139 ; Emoji # E0.6 [1] (ℹ️) information +2194..2199 ; Emoji # E0.6 [6] (↔️..↙️) left-right arrow..down-left arrow +21A9..21AA ; Emoji # E0.6 [2] (↩️..↪️) right arrow curving left..left arrow curving right +231A..231B ; Emoji # E0.6 [2] (⌚..⌛) watch..hourglass done +2328 ; Emoji # E1.0 [1] (⌨️) keyboard +23CF ; Emoji # E1.0 [1] (⏏️) eject button +23E9..23EC ; Emoji # E0.6 [4] (⏩..⏬) fast-forward button..fast down button +23ED..23EE ; Emoji # E0.7 [2] (⏭️..⏮️) next track button..last track button +23EF ; Emoji # E1.0 [1] (⏯️) play or pause button +23F0 ; Emoji # E0.6 [1] (⏰) alarm clock +23F1..23F2 ; Emoji # E1.0 [2] (⏱️..⏲️) stopwatch..timer clock +23F3 ; Emoji # E0.6 [1] (⏳) hourglass not done +23F8..23FA ; Emoji # E0.7 [3] (⏸️..⏺️) pause button..record button +24C2 ; Emoji # E0.6 [1] (Ⓜ️) circled M +25AA..25AB ; Emoji # E0.6 [2] (▪️..▫️) black small square..white small square +25B6 ; Emoji # E0.6 [1] (▶️) play button +25C0 ; Emoji # E0.6 [1] (◀️) reverse button +25FB..25FE ; Emoji # E0.6 [4] (◻️..◾) white medium square..black medium-small square +2600..2601 ; Emoji # E0.6 [2] (☀️..☁️) sun..cloud +2602..2603 ; Emoji # E0.7 [2] (☂️..☃️) umbrella..snowman +2604 ; Emoji # E1.0 [1] (☄️) comet +260E ; Emoji # E0.6 [1] (☎️) telephone +2611 ; Emoji # E0.6 [1] (☑️) check box with check +2614..2615 ; Emoji # E0.6 [2] (☔..☕) umbrella with rain drops..hot beverage +2618 ; Emoji # E1.0 [1] (☘️) shamrock +261D ; Emoji # E0.6 [1] (☝️) index pointing up +2620 ; Emoji # E1.0 [1] (☠️) skull and crossbones +2622..2623 ; Emoji # E1.0 [2] (☢️..☣️) radioactive..biohazard +2626 ; Emoji # E1.0 [1] (☦️) orthodox cross +262A ; Emoji # E0.7 [1] (☪️) star and crescent +262E ; Emoji # E1.0 [1] (☮️) peace symbol +262F ; Emoji # E0.7 [1] (☯️) yin yang +2638..2639 ; Emoji # E0.7 [2] (☸️..☹️) wheel of dharma..frowning face +263A ; Emoji # E0.6 [1] (☺️) smiling face +2640 ; Emoji # E4.0 [1] (♀️) female sign +2642 ; Emoji # E4.0 [1] (♂️) male sign +2648..2653 ; Emoji # E0.6 [12] (♈..♓) Aries..Pisces +265F ; Emoji # E11.0 [1] (♟️) chess pawn +2660 ; Emoji # E0.6 [1] (♠️) spade suit +2663 ; Emoji # E0.6 [1] (♣️) club suit +2665..2666 ; Emoji # E0.6 [2] (♥️..♦️) heart suit..diamond suit +2668 ; Emoji # E0.6 [1] (♨️) hot springs +267B ; Emoji # E0.6 [1] (♻️) recycling symbol +267E ; Emoji # E11.0 [1] (♾️) infinity +267F ; Emoji # E0.6 [1] (♿) wheelchair symbol +2692 ; Emoji # E1.0 [1] (⚒️) hammer and pick +2693 ; Emoji # E0.6 [1] (⚓) anchor +2694 ; Emoji # E1.0 [1] (⚔️) crossed swords +2695 ; Emoji # E4.0 [1] (⚕️) medical symbol +2696..2697 ; Emoji # E1.0 [2] (⚖️..⚗️) balance scale..alembic +2699 ; Emoji # E1.0 [1] (⚙️) gear +269B..269C ; Emoji # E1.0 [2] (⚛️..⚜️) atom symbol..fleur-de-lis +26A0..26A1 ; Emoji # E0.6 [2] (⚠️..⚡) warning..high voltage +26A7 ; Emoji # E13.0 [1] (⚧️) transgender symbol +26AA..26AB ; Emoji # E0.6 [2] (⚪..⚫) white circle..black circle +26B0..26B1 ; Emoji # E1.0 [2] (⚰️..⚱️) coffin..funeral urn +26BD..26BE ; Emoji # E0.6 [2] (⚽..⚾) soccer ball..baseball +26C4..26C5 ; Emoji # E0.6 [2] (⛄..⛅) snowman without snow..sun behind cloud +26C8 ; Emoji # E0.7 [1] (⛈️) cloud with lightning and rain +26CE ; Emoji # E0.6 [1] (⛎) Ophiuchus +26CF ; Emoji # E0.7 [1] (⛏️) pick +26D1 ; Emoji # E0.7 [1] (⛑️) rescue worker’s helmet +26D3 ; Emoji # E0.7 [1] (⛓️) chains +26D4 ; Emoji # E0.6 [1] (⛔) no entry +26E9 ; Emoji # E0.7 [1] (⛩️) shinto shrine +26EA ; Emoji # E0.6 [1] (⛪) church +26F0..26F1 ; Emoji # E0.7 [2] (⛰️..⛱️) mountain..umbrella on ground +26F2..26F3 ; Emoji # E0.6 [2] (⛲..⛳) fountain..flag in hole +26F4 ; Emoji # E0.7 [1] (⛴️) ferry +26F5 ; Emoji # E0.6 [1] (⛵) sailboat +26F7..26F9 ; Emoji # E0.7 [3] (⛷️..⛹️) skier..person bouncing ball +26FA ; Emoji # E0.6 [1] (⛺) tent +26FD ; Emoji # E0.6 [1] (⛽) fuel pump +2702 ; Emoji # E0.6 [1] (✂️) scissors +2705 ; Emoji # E0.6 [1] (✅) check mark button +2708..270C ; Emoji # E0.6 [5] (✈️..✌️) airplane..victory hand +270D ; Emoji # E0.7 [1] (✍️) writing hand +270F ; Emoji # E0.6 [1] (✏️) pencil +2712 ; Emoji # E0.6 [1] (✒️) black nib +2714 ; Emoji # E0.6 [1] (✔️) check mark +2716 ; Emoji # E0.6 [1] (✖️) multiply +271D ; Emoji # E0.7 [1] (✝️) latin cross +2721 ; Emoji # E0.7 [1] (✡️) star of David +2728 ; Emoji # E0.6 [1] (✨) sparkles +2733..2734 ; Emoji # E0.6 [2] (✳️..✴️) eight-spoked asterisk..eight-pointed star +2744 ; Emoji # E0.6 [1] (❄️) snowflake +2747 ; Emoji # E0.6 [1] (❇️) sparkle +274C ; Emoji # E0.6 [1] (❌) cross mark +274E ; Emoji # E0.6 [1] (❎) cross mark button +2753..2755 ; Emoji # E0.6 [3] (❓..❕) red question mark..white exclamation mark +2757 ; Emoji # E0.6 [1] (❗) red exclamation mark +2763 ; Emoji # E1.0 [1] (❣️) heart exclamation +2764 ; Emoji # E0.6 [1] (❤️) red heart +2795..2797 ; Emoji # E0.6 [3] (➕..➗) plus..divide +27A1 ; Emoji # E0.6 [1] (➡️) right arrow +27B0 ; Emoji # E0.6 [1] (➰) curly loop +27BF ; Emoji # E1.0 [1] (➿) double curly loop +2934..2935 ; Emoji # E0.6 [2] (⤴️..⤵️) right arrow curving up..right arrow curving down +2B05..2B07 ; Emoji # E0.6 [3] (⬅️..⬇️) left arrow..down arrow +2B1B..2B1C ; Emoji # E0.6 [2] (⬛..⬜) black large square..white large square +2B50 ; Emoji # E0.6 [1] (⭐) star +2B55 ; Emoji # E0.6 [1] (⭕) hollow red circle +3030 ; Emoji # E0.6 [1] (〰️) wavy dash +303D ; Emoji # E0.6 [1] (〽️) part alternation mark +3297 ; Emoji # E0.6 [1] (㊗️) Japanese “congratulations” button +3299 ; Emoji # E0.6 [1] (㊙️) Japanese “secret” button +1F004 ; Emoji # E0.6 [1] (🀄) mahjong red dragon +1F0CF ; Emoji # E0.6 [1] (🃏) joker +1F170..1F171 ; Emoji # E0.6 [2] (🅰️..🅱️) A button (blood type)..B button (blood type) +1F17E..1F17F ; Emoji # E0.6 [2] (🅾️..🅿️) O button (blood type)..P button +1F18E ; Emoji # E0.6 [1] (🆎) AB button (blood type) +1F191..1F19A ; Emoji # E0.6 [10] (🆑..🆚) CL button..VS button +1F1E6..1F1FF ; Emoji # E0.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z +1F201..1F202 ; Emoji # E0.6 [2] (🈁..🈂️) Japanese “here” button..Japanese “service charge” button +1F21A ; Emoji # E0.6 [1] (🈚) Japanese “free of charge” button +1F22F ; Emoji # E0.6 [1] (🈯) Japanese “reserved” button +1F232..1F23A ; Emoji # E0.6 [9] (🈲..🈺) Japanese “prohibited” button..Japanese “open for business” button +1F250..1F251 ; Emoji # E0.6 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button +1F300..1F30C ; Emoji # E0.6 [13] (🌀..🌌) cyclone..milky way +1F30D..1F30E ; Emoji # E0.7 [2] (🌍..🌎) globe showing Europe-Africa..globe showing Americas +1F30F ; Emoji # E0.6 [1] (🌏) globe showing Asia-Australia +1F310 ; Emoji # E1.0 [1] (🌐) globe with meridians +1F311 ; Emoji # E0.6 [1] (🌑) new moon +1F312 ; Emoji # E1.0 [1] (🌒) waxing crescent moon +1F313..1F315 ; Emoji # E0.6 [3] (🌓..🌕) first quarter moon..full moon +1F316..1F318 ; Emoji # E1.0 [3] (🌖..🌘) waning gibbous moon..waning crescent moon +1F319 ; Emoji # E0.6 [1] (🌙) crescent moon +1F31A ; Emoji # E1.0 [1] (🌚) new moon face +1F31B ; Emoji # E0.6 [1] (🌛) first quarter moon face +1F31C ; Emoji # E0.7 [1] (🌜) last quarter moon face +1F31D..1F31E ; Emoji # E1.0 [2] (🌝..🌞) full moon face..sun with face +1F31F..1F320 ; Emoji # E0.6 [2] (🌟..🌠) glowing star..shooting star +1F321 ; Emoji # E0.7 [1] (🌡️) thermometer +1F324..1F32C ; Emoji # E0.7 [9] (🌤️..🌬️) sun behind small cloud..wind face +1F32D..1F32F ; Emoji # E1.0 [3] (🌭..🌯) hot dog..burrito +1F330..1F331 ; Emoji # E0.6 [2] (🌰..🌱) chestnut..seedling +1F332..1F333 ; Emoji # E1.0 [2] (🌲..🌳) evergreen tree..deciduous tree +1F334..1F335 ; Emoji # E0.6 [2] (🌴..🌵) palm tree..cactus +1F336 ; Emoji # E0.7 [1] (🌶️) hot pepper +1F337..1F34A ; Emoji # E0.6 [20] (🌷..🍊) tulip..tangerine +1F34B ; Emoji # E1.0 [1] (🍋) lemon +1F34C..1F34F ; Emoji # E0.6 [4] (🍌..🍏) banana..green apple +1F350 ; Emoji # E1.0 [1] (🍐) pear +1F351..1F37B ; Emoji # E0.6 [43] (🍑..🍻) peach..clinking beer mugs +1F37C ; Emoji # E1.0 [1] (🍼) baby bottle +1F37D ; Emoji # E0.7 [1] (🍽️) fork and knife with plate +1F37E..1F37F ; Emoji # E1.0 [2] (🍾..🍿) bottle with popping cork..popcorn +1F380..1F393 ; Emoji # E0.6 [20] (🎀..🎓) ribbon..graduation cap +1F396..1F397 ; Emoji # E0.7 [2] (🎖️..🎗️) military medal..reminder ribbon +1F399..1F39B ; Emoji # E0.7 [3] (🎙️..🎛️) studio microphone..control knobs +1F39E..1F39F ; Emoji # E0.7 [2] (🎞️..🎟️) film frames..admission tickets +1F3A0..1F3C4 ; Emoji # E0.6 [37] (🎠..🏄) carousel horse..person surfing +1F3C5 ; Emoji # E1.0 [1] (🏅) sports medal +1F3C6 ; Emoji # E0.6 [1] (🏆) trophy +1F3C7 ; Emoji # E1.0 [1] (🏇) horse racing +1F3C8 ; Emoji # E0.6 [1] (🏈) american football +1F3C9 ; Emoji # E1.0 [1] (🏉) rugby football +1F3CA ; Emoji # E0.6 [1] (🏊) person swimming +1F3CB..1F3CE ; Emoji # E0.7 [4] (🏋️..🏎️) person lifting weights..racing car +1F3CF..1F3D3 ; Emoji # E1.0 [5] (🏏..🏓) cricket game..ping pong +1F3D4..1F3DF ; Emoji # E0.7 [12] (🏔️..🏟️) snow-capped mountain..stadium +1F3E0..1F3E3 ; Emoji # E0.6 [4] (🏠..🏣) house..Japanese post office +1F3E4 ; Emoji # E1.0 [1] (🏤) post office +1F3E5..1F3F0 ; Emoji # E0.6 [12] (🏥..🏰) hospital..castle +1F3F3 ; Emoji # E0.7 [1] (🏳️) white flag +1F3F4 ; Emoji # E1.0 [1] (🏴) black flag +1F3F5 ; Emoji # E0.7 [1] (🏵️) rosette +1F3F7 ; Emoji # E0.7 [1] (🏷️) label +1F3F8..1F407 ; Emoji # E1.0 [16] (🏸..🐇) badminton..rabbit +1F408 ; Emoji # E0.7 [1] (🐈) cat +1F409..1F40B ; Emoji # E1.0 [3] (🐉..🐋) dragon..whale +1F40C..1F40E ; Emoji # E0.6 [3] (🐌..🐎) snail..horse +1F40F..1F410 ; Emoji # E1.0 [2] (🐏..🐐) ram..goat +1F411..1F412 ; Emoji # E0.6 [2] (🐑..🐒) ewe..monkey +1F413 ; Emoji # E1.0 [1] (🐓) rooster +1F414 ; Emoji # E0.6 [1] (🐔) chicken +1F415 ; Emoji # E0.7 [1] (🐕) dog +1F416 ; Emoji # E1.0 [1] (🐖) pig +1F417..1F429 ; Emoji # E0.6 [19] (🐗..🐩) boar..poodle +1F42A ; Emoji # E1.0 [1] (🐪) camel +1F42B..1F43E ; Emoji # E0.6 [20] (🐫..🐾) two-hump camel..paw prints +1F43F ; Emoji # E0.7 [1] (🐿️) chipmunk +1F440 ; Emoji # E0.6 [1] (👀) eyes +1F441 ; Emoji # E0.7 [1] (👁️) eye +1F442..1F464 ; Emoji # E0.6 [35] (👂..👤) ear..bust in silhouette +1F465 ; Emoji # E1.0 [1] (👥) busts in silhouette +1F466..1F46B ; Emoji # E0.6 [6] (👦..👫) boy..woman and man holding hands +1F46C..1F46D ; Emoji # E1.0 [2] (👬..👭) men holding hands..women holding hands +1F46E..1F4AC ; Emoji # E0.6 [63] (👮..💬) police officer..speech balloon +1F4AD ; Emoji # E1.0 [1] (💭) thought balloon +1F4AE..1F4B5 ; Emoji # E0.6 [8] (💮..💵) white flower..dollar banknote +1F4B6..1F4B7 ; Emoji # E1.0 [2] (💶..💷) euro banknote..pound banknote +1F4B8..1F4EB ; Emoji # E0.6 [52] (💸..📫) money with wings..closed mailbox with raised flag +1F4EC..1F4ED ; Emoji # E0.7 [2] (📬..📭) open mailbox with raised flag..open mailbox with lowered flag +1F4EE ; Emoji # E0.6 [1] (📮) postbox +1F4EF ; Emoji # E1.0 [1] (📯) postal horn +1F4F0..1F4F4 ; Emoji # E0.6 [5] (📰..📴) newspaper..mobile phone off +1F4F5 ; Emoji # E1.0 [1] (📵) no mobile phones +1F4F6..1F4F7 ; Emoji # E0.6 [2] (📶..📷) antenna bars..camera +1F4F8 ; Emoji # E1.0 [1] (📸) camera with flash +1F4F9..1F4FC ; Emoji # E0.6 [4] (📹..📼) video camera..videocassette +1F4FD ; Emoji # E0.7 [1] (📽️) film projector +1F4FF..1F502 ; Emoji # E1.0 [4] (📿..🔂) prayer beads..repeat single button +1F503 ; Emoji # E0.6 [1] (🔃) clockwise vertical arrows +1F504..1F507 ; Emoji # E1.0 [4] (🔄..🔇) counterclockwise arrows button..muted speaker +1F508 ; Emoji # E0.7 [1] (🔈) speaker low volume +1F509 ; Emoji # E1.0 [1] (🔉) speaker medium volume +1F50A..1F514 ; Emoji # E0.6 [11] (🔊..🔔) speaker high volume..bell +1F515 ; Emoji # E1.0 [1] (🔕) bell with slash +1F516..1F52B ; Emoji # E0.6 [22] (🔖..🔫) bookmark..water pistol +1F52C..1F52D ; Emoji # E1.0 [2] (🔬..🔭) microscope..telescope +1F52E..1F53D ; Emoji # E0.6 [16] (🔮..🔽) crystal ball..downwards button +1F549..1F54A ; Emoji # E0.7 [2] (🕉️..🕊️) om..dove +1F54B..1F54E ; Emoji # E1.0 [4] (🕋..🕎) kaaba..menorah +1F550..1F55B ; Emoji # E0.6 [12] (🕐..🕛) one o’clock..twelve o’clock +1F55C..1F567 ; Emoji # E0.7 [12] (🕜..🕧) one-thirty..twelve-thirty +1F56F..1F570 ; Emoji # E0.7 [2] (🕯️..🕰️) candle..mantelpiece clock +1F573..1F579 ; Emoji # E0.7 [7] (🕳️..🕹️) hole..joystick +1F57A ; Emoji # E3.0 [1] (🕺) man dancing +1F587 ; Emoji # E0.7 [1] (🖇️) linked paperclips +1F58A..1F58D ; Emoji # E0.7 [4] (🖊️..🖍️) pen..crayon +1F590 ; Emoji # E0.7 [1] (🖐️) hand with fingers splayed +1F595..1F596 ; Emoji # E1.0 [2] (🖕..🖖) middle finger..vulcan salute +1F5A4 ; Emoji # E3.0 [1] (🖤) black heart +1F5A5 ; Emoji # E0.7 [1] (🖥️) desktop computer +1F5A8 ; Emoji # E0.7 [1] (🖨️) printer +1F5B1..1F5B2 ; Emoji # E0.7 [2] (🖱️..🖲️) computer mouse..trackball +1F5BC ; Emoji # E0.7 [1] (🖼️) framed picture +1F5C2..1F5C4 ; Emoji # E0.7 [3] (🗂️..🗄️) card index dividers..file cabinet +1F5D1..1F5D3 ; Emoji # E0.7 [3] (🗑️..🗓️) wastebasket..spiral calendar +1F5DC..1F5DE ; Emoji # E0.7 [3] (🗜️..🗞️) clamp..rolled-up newspaper +1F5E1 ; Emoji # E0.7 [1] (🗡️) dagger +1F5E3 ; Emoji # E0.7 [1] (🗣️) speaking head +1F5E8 ; Emoji # E2.0 [1] (🗨️) left speech bubble +1F5EF ; Emoji # E0.7 [1] (🗯️) right anger bubble +1F5F3 ; Emoji # E0.7 [1] (🗳️) ballot box with ballot +1F5FA ; Emoji # E0.7 [1] (🗺️) world map +1F5FB..1F5FF ; Emoji # E0.6 [5] (🗻..🗿) mount fuji..moai +1F600 ; Emoji # E1.0 [1] (😀) grinning face +1F601..1F606 ; Emoji # E0.6 [6] (😁..😆) beaming face with smiling eyes..grinning squinting face +1F607..1F608 ; Emoji # E1.0 [2] (😇..😈) smiling face with halo..smiling face with horns +1F609..1F60D ; Emoji # E0.6 [5] (😉..😍) winking face..smiling face with heart-eyes +1F60E ; Emoji # E1.0 [1] (😎) smiling face with sunglasses +1F60F ; Emoji # E0.6 [1] (😏) smirking face +1F610 ; Emoji # E0.7 [1] (😐) neutral face +1F611 ; Emoji # E1.0 [1] (😑) expressionless face +1F612..1F614 ; Emoji # E0.6 [3] (😒..😔) unamused face..pensive face +1F615 ; Emoji # E1.0 [1] (😕) confused face +1F616 ; Emoji # E0.6 [1] (😖) confounded face +1F617 ; Emoji # E1.0 [1] (😗) kissing face +1F618 ; Emoji # E0.6 [1] (😘) face blowing a kiss +1F619 ; Emoji # E1.0 [1] (😙) kissing face with smiling eyes +1F61A ; Emoji # E0.6 [1] (😚) kissing face with closed eyes +1F61B ; Emoji # E1.0 [1] (😛) face with tongue +1F61C..1F61E ; Emoji # E0.6 [3] (😜..😞) winking face with tongue..disappointed face +1F61F ; Emoji # E1.0 [1] (😟) worried face +1F620..1F625 ; Emoji # E0.6 [6] (😠..😥) angry face..sad but relieved face +1F626..1F627 ; Emoji # E1.0 [2] (😦..😧) frowning face with open mouth..anguished face +1F628..1F62B ; Emoji # E0.6 [4] (😨..😫) fearful face..tired face +1F62C ; Emoji # E1.0 [1] (😬) grimacing face +1F62D ; Emoji # E0.6 [1] (😭) loudly crying face +1F62E..1F62F ; Emoji # E1.0 [2] (😮..😯) face with open mouth..hushed face +1F630..1F633 ; Emoji # E0.6 [4] (😰..😳) anxious face with sweat..flushed face +1F634 ; Emoji # E1.0 [1] (😴) sleeping face +1F635 ; Emoji # E0.6 [1] (😵) face with crossed-out eyes +1F636 ; Emoji # E1.0 [1] (😶) face without mouth +1F637..1F640 ; Emoji # E0.6 [10] (😷..🙀) face with medical mask..weary cat +1F641..1F644 ; Emoji # E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes +1F645..1F64F ; Emoji # E0.6 [11] (🙅..🙏) person gesturing NO..folded hands +1F680 ; Emoji # E0.6 [1] (🚀) rocket +1F681..1F682 ; Emoji # E1.0 [2] (🚁..🚂) helicopter..locomotive +1F683..1F685 ; Emoji # E0.6 [3] (🚃..🚅) railway car..bullet train +1F686 ; Emoji # E1.0 [1] (🚆) train +1F687 ; Emoji # E0.6 [1] (🚇) metro +1F688 ; Emoji # E1.0 [1] (🚈) light rail +1F689 ; Emoji # E0.6 [1] (🚉) station +1F68A..1F68B ; Emoji # E1.0 [2] (🚊..🚋) tram..tram car +1F68C ; Emoji # E0.6 [1] (🚌) bus +1F68D ; Emoji # E0.7 [1] (🚍) oncoming bus +1F68E ; Emoji # E1.0 [1] (🚎) trolleybus +1F68F ; Emoji # E0.6 [1] (🚏) bus stop +1F690 ; Emoji # E1.0 [1] (🚐) minibus +1F691..1F693 ; Emoji # E0.6 [3] (🚑..🚓) ambulance..police car +1F694 ; Emoji # E0.7 [1] (🚔) oncoming police car +1F695 ; Emoji # E0.6 [1] (🚕) taxi +1F696 ; Emoji # E1.0 [1] (🚖) oncoming taxi +1F697 ; Emoji # E0.6 [1] (🚗) automobile +1F698 ; Emoji # E0.7 [1] (🚘) oncoming automobile +1F699..1F69A ; Emoji # E0.6 [2] (🚙..🚚) sport utility vehicle..delivery truck +1F69B..1F6A1 ; Emoji # E1.0 [7] (🚛..🚡) articulated lorry..aerial tramway +1F6A2 ; Emoji # E0.6 [1] (🚢) ship +1F6A3 ; Emoji # E1.0 [1] (🚣) person rowing boat +1F6A4..1F6A5 ; Emoji # E0.6 [2] (🚤..🚥) speedboat..horizontal traffic light +1F6A6 ; Emoji # E1.0 [1] (🚦) vertical traffic light +1F6A7..1F6AD ; Emoji # E0.6 [7] (🚧..🚭) construction..no smoking +1F6AE..1F6B1 ; Emoji # E1.0 [4] (🚮..🚱) litter in bin sign..non-potable water +1F6B2 ; Emoji # E0.6 [1] (🚲) bicycle +1F6B3..1F6B5 ; Emoji # E1.0 [3] (🚳..🚵) no bicycles..person mountain biking +1F6B6 ; Emoji # E0.6 [1] (🚶) person walking +1F6B7..1F6B8 ; Emoji # E1.0 [2] (🚷..🚸) no pedestrians..children crossing +1F6B9..1F6BE ; Emoji # E0.6 [6] (🚹..🚾) men’s room..water closet +1F6BF ; Emoji # E1.0 [1] (🚿) shower +1F6C0 ; Emoji # E0.6 [1] (🛀) person taking bath +1F6C1..1F6C5 ; Emoji # E1.0 [5] (🛁..🛅) bathtub..left luggage +1F6CB ; Emoji # E0.7 [1] (🛋️) couch and lamp +1F6CC ; Emoji # E1.0 [1] (🛌) person in bed +1F6CD..1F6CF ; Emoji # E0.7 [3] (🛍️..🛏️) shopping bags..bed +1F6D0 ; Emoji # E1.0 [1] (🛐) place of worship +1F6D1..1F6D2 ; Emoji # E3.0 [2] (🛑..🛒) stop sign..shopping cart +1F6D5 ; Emoji # E12.0 [1] (🛕) hindu temple +1F6D6..1F6D7 ; Emoji # E13.0 [2] (🛖..🛗) hut..elevator +1F6DD..1F6DF ; Emoji # E14.0 [3] (🛝..🛟) playground slide..ring buoy +1F6E0..1F6E5 ; Emoji # E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat +1F6E9 ; Emoji # E0.7 [1] (🛩️) small airplane +1F6EB..1F6EC ; Emoji # E1.0 [2] (🛫..🛬) airplane departure..airplane arrival +1F6F0 ; Emoji # E0.7 [1] (🛰️) satellite +1F6F3 ; Emoji # E0.7 [1] (🛳️) passenger ship +1F6F4..1F6F6 ; Emoji # E3.0 [3] (🛴..🛶) kick scooter..canoe +1F6F7..1F6F8 ; Emoji # E5.0 [2] (🛷..🛸) sled..flying saucer +1F6F9 ; Emoji # E11.0 [1] (🛹) skateboard +1F6FA ; Emoji # E12.0 [1] (🛺) auto rickshaw +1F6FB..1F6FC ; Emoji # E13.0 [2] (🛻..🛼) pickup truck..roller skate +1F7E0..1F7EB ; Emoji # E12.0 [12] (🟠..🟫) orange circle..brown square +1F7F0 ; Emoji # E14.0 [1] (🟰) heavy equals sign +1F90C ; Emoji # E13.0 [1] (🤌) pinched fingers +1F90D..1F90F ; Emoji # E12.0 [3] (🤍..🤏) white heart..pinching hand +1F910..1F918 ; Emoji # E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns +1F919..1F91E ; Emoji # E3.0 [6] (🤙..🤞) call me hand..crossed fingers +1F91F ; Emoji # E5.0 [1] (🤟) love-you gesture +1F920..1F927 ; Emoji # E3.0 [8] (🤠..🤧) cowboy hat face..sneezing face +1F928..1F92F ; Emoji # E5.0 [8] (🤨..🤯) face with raised eyebrow..exploding head +1F930 ; Emoji # E3.0 [1] (🤰) pregnant woman +1F931..1F932 ; Emoji # E5.0 [2] (🤱..🤲) breast-feeding..palms up together +1F933..1F93A ; Emoji # E3.0 [8] (🤳..🤺) selfie..person fencing +1F93C..1F93E ; Emoji # E3.0 [3] (🤼..🤾) people wrestling..person playing handball +1F93F ; Emoji # E12.0 [1] (🤿) diving mask +1F940..1F945 ; Emoji # E3.0 [6] (🥀..🥅) wilted flower..goal net +1F947..1F94B ; Emoji # E3.0 [5] (🥇..🥋) 1st place medal..martial arts uniform +1F94C ; Emoji # E5.0 [1] (🥌) curling stone +1F94D..1F94F ; Emoji # E11.0 [3] (🥍..🥏) lacrosse..flying disc +1F950..1F95E ; Emoji # E3.0 [15] (🥐..🥞) croissant..pancakes +1F95F..1F96B ; Emoji # E5.0 [13] (🥟..🥫) dumpling..canned food +1F96C..1F970 ; Emoji # E11.0 [5] (🥬..🥰) leafy green..smiling face with hearts +1F971 ; Emoji # E12.0 [1] (🥱) yawning face +1F972 ; Emoji # E13.0 [1] (🥲) smiling face with tear +1F973..1F976 ; Emoji # E11.0 [4] (🥳..🥶) partying face..cold face +1F977..1F978 ; Emoji # E13.0 [2] (🥷..🥸) ninja..disguised face +1F979 ; Emoji # E14.0 [1] (🥹) face holding back tears +1F97A ; Emoji # E11.0 [1] (🥺) pleading face +1F97B ; Emoji # E12.0 [1] (🥻) sari +1F97C..1F97F ; Emoji # E11.0 [4] (🥼..🥿) lab coat..flat shoe +1F980..1F984 ; Emoji # E1.0 [5] (🦀..🦄) crab..unicorn +1F985..1F991 ; Emoji # E3.0 [13] (🦅..🦑) eagle..squid +1F992..1F997 ; Emoji # E5.0 [6] (🦒..🦗) giraffe..cricket +1F998..1F9A2 ; Emoji # E11.0 [11] (🦘..🦢) kangaroo..swan +1F9A3..1F9A4 ; Emoji # E13.0 [2] (🦣..🦤) mammoth..dodo +1F9A5..1F9AA ; Emoji # E12.0 [6] (🦥..🦪) sloth..oyster +1F9AB..1F9AD ; Emoji # E13.0 [3] (🦫..🦭) beaver..seal +1F9AE..1F9AF ; Emoji # E12.0 [2] (🦮..🦯) guide dog..white cane +1F9B0..1F9B9 ; Emoji # E11.0 [10] (🦰..🦹) red hair..supervillain +1F9BA..1F9BF ; Emoji # E12.0 [6] (🦺..🦿) safety vest..mechanical leg +1F9C0 ; Emoji # E1.0 [1] (🧀) cheese wedge +1F9C1..1F9C2 ; Emoji # E11.0 [2] (🧁..🧂) cupcake..salt +1F9C3..1F9CA ; Emoji # E12.0 [8] (🧃..🧊) beverage box..ice +1F9CB ; Emoji # E13.0 [1] (🧋) bubble tea +1F9CC ; Emoji # E14.0 [1] (🧌) troll +1F9CD..1F9CF ; Emoji # E12.0 [3] (🧍..🧏) person standing..deaf person +1F9D0..1F9E6 ; Emoji # E5.0 [23] (🧐..🧦) face with monocle..socks +1F9E7..1F9FF ; Emoji # E11.0 [25] (🧧..🧿) red envelope..nazar amulet +1FA70..1FA73 ; Emoji # E12.0 [4] (🩰..🩳) ballet shoes..shorts +1FA74 ; Emoji # E13.0 [1] (🩴) thong sandal +1FA78..1FA7A ; Emoji # E12.0 [3] (🩸..🩺) drop of blood..stethoscope +1FA7B..1FA7C ; Emoji # E14.0 [2] (🩻..🩼) x-ray..crutch +1FA80..1FA82 ; Emoji # E12.0 [3] (🪀..🪂) yo-yo..parachute +1FA83..1FA86 ; Emoji # E13.0 [4] (🪃..🪆) boomerang..nesting dolls +1FA90..1FA95 ; Emoji # E12.0 [6] (🪐..🪕) ringed planet..banjo +1FA96..1FAA8 ; Emoji # E13.0 [19] (🪖..🪨) military helmet..rock +1FAA9..1FAAC ; Emoji # E14.0 [4] (🪩..🪬) mirror ball..hamsa +1FAB0..1FAB6 ; Emoji # E13.0 [7] (🪰..🪶) fly..feather +1FAB7..1FABA ; Emoji # E14.0 [4] (🪷..🪺) lotus..nest with eggs +1FAC0..1FAC2 ; Emoji # E13.0 [3] (🫀..🫂) anatomical heart..people hugging +1FAC3..1FAC5 ; Emoji # E14.0 [3] (🫃..🫅) pregnant man..person with crown +1FAD0..1FAD6 ; Emoji # E13.0 [7] (🫐..🫖) blueberries..teapot +1FAD7..1FAD9 ; Emoji # E14.0 [3] (🫗..🫙) pouring liquid..jar +1FAE0..1FAE7 ; Emoji # E14.0 [8] (🫠..🫧) melting face..bubbles +1FAF0..1FAF6 ; Emoji # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands + +# Total elements: 1404 + +# ================================================ + +# All omitted code points have Emoji_Presentation=No +# @missing: 0000..10FFFF ; Emoji_Presentation ; No + +231A..231B ; Emoji_Presentation # E0.6 [2] (⌚..⌛) watch..hourglass done +23E9..23EC ; Emoji_Presentation # E0.6 [4] (⏩..⏬) fast-forward button..fast down button +23F0 ; Emoji_Presentation # E0.6 [1] (⏰) alarm clock +23F3 ; Emoji_Presentation # E0.6 [1] (⏳) hourglass not done +25FD..25FE ; Emoji_Presentation # E0.6 [2] (◽..◾) white medium-small square..black medium-small square +2614..2615 ; Emoji_Presentation # E0.6 [2] (☔..☕) umbrella with rain drops..hot beverage +2648..2653 ; Emoji_Presentation # E0.6 [12] (♈..♓) Aries..Pisces +267F ; Emoji_Presentation # E0.6 [1] (♿) wheelchair symbol +2693 ; Emoji_Presentation # E0.6 [1] (⚓) anchor +26A1 ; Emoji_Presentation # E0.6 [1] (⚡) high voltage +26AA..26AB ; Emoji_Presentation # E0.6 [2] (⚪..⚫) white circle..black circle +26BD..26BE ; Emoji_Presentation # E0.6 [2] (⚽..⚾) soccer ball..baseball +26C4..26C5 ; Emoji_Presentation # E0.6 [2] (⛄..⛅) snowman without snow..sun behind cloud +26CE ; Emoji_Presentation # E0.6 [1] (⛎) Ophiuchus +26D4 ; Emoji_Presentation # E0.6 [1] (⛔) no entry +26EA ; Emoji_Presentation # E0.6 [1] (⛪) church +26F2..26F3 ; Emoji_Presentation # E0.6 [2] (⛲..⛳) fountain..flag in hole +26F5 ; Emoji_Presentation # E0.6 [1] (⛵) sailboat +26FA ; Emoji_Presentation # E0.6 [1] (⛺) tent +26FD ; Emoji_Presentation # E0.6 [1] (⛽) fuel pump +2705 ; Emoji_Presentation # E0.6 [1] (✅) check mark button +270A..270B ; Emoji_Presentation # E0.6 [2] (✊..✋) raised fist..raised hand +2728 ; Emoji_Presentation # E0.6 [1] (✨) sparkles +274C ; Emoji_Presentation # E0.6 [1] (❌) cross mark +274E ; Emoji_Presentation # E0.6 [1] (❎) cross mark button +2753..2755 ; Emoji_Presentation # E0.6 [3] (❓..❕) red question mark..white exclamation mark +2757 ; Emoji_Presentation # E0.6 [1] (❗) red exclamation mark +2795..2797 ; Emoji_Presentation # E0.6 [3] (➕..➗) plus..divide +27B0 ; Emoji_Presentation # E0.6 [1] (➰) curly loop +27BF ; Emoji_Presentation # E1.0 [1] (➿) double curly loop +2B1B..2B1C ; Emoji_Presentation # E0.6 [2] (⬛..⬜) black large square..white large square +2B50 ; Emoji_Presentation # E0.6 [1] (⭐) star +2B55 ; Emoji_Presentation # E0.6 [1] (⭕) hollow red circle +1F004 ; Emoji_Presentation # E0.6 [1] (🀄) mahjong red dragon +1F0CF ; Emoji_Presentation # E0.6 [1] (🃏) joker +1F18E ; Emoji_Presentation # E0.6 [1] (🆎) AB button (blood type) +1F191..1F19A ; Emoji_Presentation # E0.6 [10] (🆑..🆚) CL button..VS button +1F1E6..1F1FF ; Emoji_Presentation # E0.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z +1F201 ; Emoji_Presentation # E0.6 [1] (🈁) Japanese “here” button +1F21A ; Emoji_Presentation # E0.6 [1] (🈚) Japanese “free of charge” button +1F22F ; Emoji_Presentation # E0.6 [1] (🈯) Japanese “reserved” button +1F232..1F236 ; Emoji_Presentation # E0.6 [5] (🈲..🈶) Japanese “prohibited” button..Japanese “not free of charge” button +1F238..1F23A ; Emoji_Presentation # E0.6 [3] (🈸..🈺) Japanese “application” button..Japanese “open for business” button +1F250..1F251 ; Emoji_Presentation # E0.6 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button +1F300..1F30C ; Emoji_Presentation # E0.6 [13] (🌀..🌌) cyclone..milky way +1F30D..1F30E ; Emoji_Presentation # E0.7 [2] (🌍..🌎) globe showing Europe-Africa..globe showing Americas +1F30F ; Emoji_Presentation # E0.6 [1] (🌏) globe showing Asia-Australia +1F310 ; Emoji_Presentation # E1.0 [1] (🌐) globe with meridians +1F311 ; Emoji_Presentation # E0.6 [1] (🌑) new moon +1F312 ; Emoji_Presentation # E1.0 [1] (🌒) waxing crescent moon +1F313..1F315 ; Emoji_Presentation # E0.6 [3] (🌓..🌕) first quarter moon..full moon +1F316..1F318 ; Emoji_Presentation # E1.0 [3] (🌖..🌘) waning gibbous moon..waning crescent moon +1F319 ; Emoji_Presentation # E0.6 [1] (🌙) crescent moon +1F31A ; Emoji_Presentation # E1.0 [1] (🌚) new moon face +1F31B ; Emoji_Presentation # E0.6 [1] (🌛) first quarter moon face +1F31C ; Emoji_Presentation # E0.7 [1] (🌜) last quarter moon face +1F31D..1F31E ; Emoji_Presentation # E1.0 [2] (🌝..🌞) full moon face..sun with face +1F31F..1F320 ; Emoji_Presentation # E0.6 [2] (🌟..🌠) glowing star..shooting star +1F32D..1F32F ; Emoji_Presentation # E1.0 [3] (🌭..🌯) hot dog..burrito +1F330..1F331 ; Emoji_Presentation # E0.6 [2] (🌰..🌱) chestnut..seedling +1F332..1F333 ; Emoji_Presentation # E1.0 [2] (🌲..🌳) evergreen tree..deciduous tree +1F334..1F335 ; Emoji_Presentation # E0.6 [2] (🌴..🌵) palm tree..cactus +1F337..1F34A ; Emoji_Presentation # E0.6 [20] (🌷..🍊) tulip..tangerine +1F34B ; Emoji_Presentation # E1.0 [1] (🍋) lemon +1F34C..1F34F ; Emoji_Presentation # E0.6 [4] (🍌..🍏) banana..green apple +1F350 ; Emoji_Presentation # E1.0 [1] (🍐) pear +1F351..1F37B ; Emoji_Presentation # E0.6 [43] (🍑..🍻) peach..clinking beer mugs +1F37C ; Emoji_Presentation # E1.0 [1] (🍼) baby bottle +1F37E..1F37F ; Emoji_Presentation # E1.0 [2] (🍾..🍿) bottle with popping cork..popcorn +1F380..1F393 ; Emoji_Presentation # E0.6 [20] (🎀..🎓) ribbon..graduation cap +1F3A0..1F3C4 ; Emoji_Presentation # E0.6 [37] (🎠..🏄) carousel horse..person surfing +1F3C5 ; Emoji_Presentation # E1.0 [1] (🏅) sports medal +1F3C6 ; Emoji_Presentation # E0.6 [1] (🏆) trophy +1F3C7 ; Emoji_Presentation # E1.0 [1] (🏇) horse racing +1F3C8 ; Emoji_Presentation # E0.6 [1] (🏈) american football +1F3C9 ; Emoji_Presentation # E1.0 [1] (🏉) rugby football +1F3CA ; Emoji_Presentation # E0.6 [1] (🏊) person swimming +1F3CF..1F3D3 ; Emoji_Presentation # E1.0 [5] (🏏..🏓) cricket game..ping pong +1F3E0..1F3E3 ; Emoji_Presentation # E0.6 [4] (🏠..🏣) house..Japanese post office +1F3E4 ; Emoji_Presentation # E1.0 [1] (🏤) post office +1F3E5..1F3F0 ; Emoji_Presentation # E0.6 [12] (🏥..🏰) hospital..castle +1F3F4 ; Emoji_Presentation # E1.0 [1] (🏴) black flag +1F3F8..1F407 ; Emoji_Presentation # E1.0 [16] (🏸..🐇) badminton..rabbit +1F408 ; Emoji_Presentation # E0.7 [1] (🐈) cat +1F409..1F40B ; Emoji_Presentation # E1.0 [3] (🐉..🐋) dragon..whale +1F40C..1F40E ; Emoji_Presentation # E0.6 [3] (🐌..🐎) snail..horse +1F40F..1F410 ; Emoji_Presentation # E1.0 [2] (🐏..🐐) ram..goat +1F411..1F412 ; Emoji_Presentation # E0.6 [2] (🐑..🐒) ewe..monkey +1F413 ; Emoji_Presentation # E1.0 [1] (🐓) rooster +1F414 ; Emoji_Presentation # E0.6 [1] (🐔) chicken +1F415 ; Emoji_Presentation # E0.7 [1] (🐕) dog +1F416 ; Emoji_Presentation # E1.0 [1] (🐖) pig +1F417..1F429 ; Emoji_Presentation # E0.6 [19] (🐗..🐩) boar..poodle +1F42A ; Emoji_Presentation # E1.0 [1] (🐪) camel +1F42B..1F43E ; Emoji_Presentation # E0.6 [20] (🐫..🐾) two-hump camel..paw prints +1F440 ; Emoji_Presentation # E0.6 [1] (👀) eyes +1F442..1F464 ; Emoji_Presentation # E0.6 [35] (👂..👤) ear..bust in silhouette +1F465 ; Emoji_Presentation # E1.0 [1] (👥) busts in silhouette +1F466..1F46B ; Emoji_Presentation # E0.6 [6] (👦..👫) boy..woman and man holding hands +1F46C..1F46D ; Emoji_Presentation # E1.0 [2] (👬..👭) men holding hands..women holding hands +1F46E..1F4AC ; Emoji_Presentation # E0.6 [63] (👮..💬) police officer..speech balloon +1F4AD ; Emoji_Presentation # E1.0 [1] (💭) thought balloon +1F4AE..1F4B5 ; Emoji_Presentation # E0.6 [8] (💮..💵) white flower..dollar banknote +1F4B6..1F4B7 ; Emoji_Presentation # E1.0 [2] (💶..💷) euro banknote..pound banknote +1F4B8..1F4EB ; Emoji_Presentation # E0.6 [52] (💸..📫) money with wings..closed mailbox with raised flag +1F4EC..1F4ED ; Emoji_Presentation # E0.7 [2] (📬..📭) open mailbox with raised flag..open mailbox with lowered flag +1F4EE ; Emoji_Presentation # E0.6 [1] (📮) postbox +1F4EF ; Emoji_Presentation # E1.0 [1] (📯) postal horn +1F4F0..1F4F4 ; Emoji_Presentation # E0.6 [5] (📰..📴) newspaper..mobile phone off +1F4F5 ; Emoji_Presentation # E1.0 [1] (📵) no mobile phones +1F4F6..1F4F7 ; Emoji_Presentation # E0.6 [2] (📶..📷) antenna bars..camera +1F4F8 ; Emoji_Presentation # E1.0 [1] (📸) camera with flash +1F4F9..1F4FC ; Emoji_Presentation # E0.6 [4] (📹..📼) video camera..videocassette +1F4FF..1F502 ; Emoji_Presentation # E1.0 [4] (📿..🔂) prayer beads..repeat single button +1F503 ; Emoji_Presentation # E0.6 [1] (🔃) clockwise vertical arrows +1F504..1F507 ; Emoji_Presentation # E1.0 [4] (🔄..🔇) counterclockwise arrows button..muted speaker +1F508 ; Emoji_Presentation # E0.7 [1] (🔈) speaker low volume +1F509 ; Emoji_Presentation # E1.0 [1] (🔉) speaker medium volume +1F50A..1F514 ; Emoji_Presentation # E0.6 [11] (🔊..🔔) speaker high volume..bell +1F515 ; Emoji_Presentation # E1.0 [1] (🔕) bell with slash +1F516..1F52B ; Emoji_Presentation # E0.6 [22] (🔖..🔫) bookmark..water pistol +1F52C..1F52D ; Emoji_Presentation # E1.0 [2] (🔬..🔭) microscope..telescope +1F52E..1F53D ; Emoji_Presentation # E0.6 [16] (🔮..🔽) crystal ball..downwards button +1F54B..1F54E ; Emoji_Presentation # E1.0 [4] (🕋..🕎) kaaba..menorah +1F550..1F55B ; Emoji_Presentation # E0.6 [12] (🕐..🕛) one o’clock..twelve o’clock +1F55C..1F567 ; Emoji_Presentation # E0.7 [12] (🕜..🕧) one-thirty..twelve-thirty +1F57A ; Emoji_Presentation # E3.0 [1] (🕺) man dancing +1F595..1F596 ; Emoji_Presentation # E1.0 [2] (🖕..🖖) middle finger..vulcan salute +1F5A4 ; Emoji_Presentation # E3.0 [1] (🖤) black heart +1F5FB..1F5FF ; Emoji_Presentation # E0.6 [5] (🗻..🗿) mount fuji..moai +1F600 ; Emoji_Presentation # E1.0 [1] (😀) grinning face +1F601..1F606 ; Emoji_Presentation # E0.6 [6] (😁..😆) beaming face with smiling eyes..grinning squinting face +1F607..1F608 ; Emoji_Presentation # E1.0 [2] (😇..😈) smiling face with halo..smiling face with horns +1F609..1F60D ; Emoji_Presentation # E0.6 [5] (😉..😍) winking face..smiling face with heart-eyes +1F60E ; Emoji_Presentation # E1.0 [1] (😎) smiling face with sunglasses +1F60F ; Emoji_Presentation # E0.6 [1] (😏) smirking face +1F610 ; Emoji_Presentation # E0.7 [1] (😐) neutral face +1F611 ; Emoji_Presentation # E1.0 [1] (😑) expressionless face +1F612..1F614 ; Emoji_Presentation # E0.6 [3] (😒..😔) unamused face..pensive face +1F615 ; Emoji_Presentation # E1.0 [1] (😕) confused face +1F616 ; Emoji_Presentation # E0.6 [1] (😖) confounded face +1F617 ; Emoji_Presentation # E1.0 [1] (😗) kissing face +1F618 ; Emoji_Presentation # E0.6 [1] (😘) face blowing a kiss +1F619 ; Emoji_Presentation # E1.0 [1] (😙) kissing face with smiling eyes +1F61A ; Emoji_Presentation # E0.6 [1] (😚) kissing face with closed eyes +1F61B ; Emoji_Presentation # E1.0 [1] (😛) face with tongue +1F61C..1F61E ; Emoji_Presentation # E0.6 [3] (😜..😞) winking face with tongue..disappointed face +1F61F ; Emoji_Presentation # E1.0 [1] (😟) worried face +1F620..1F625 ; Emoji_Presentation # E0.6 [6] (😠..😥) angry face..sad but relieved face +1F626..1F627 ; Emoji_Presentation # E1.0 [2] (😦..😧) frowning face with open mouth..anguished face +1F628..1F62B ; Emoji_Presentation # E0.6 [4] (😨..😫) fearful face..tired face +1F62C ; Emoji_Presentation # E1.0 [1] (😬) grimacing face +1F62D ; Emoji_Presentation # E0.6 [1] (😭) loudly crying face +1F62E..1F62F ; Emoji_Presentation # E1.0 [2] (😮..😯) face with open mouth..hushed face +1F630..1F633 ; Emoji_Presentation # E0.6 [4] (😰..😳) anxious face with sweat..flushed face +1F634 ; Emoji_Presentation # E1.0 [1] (😴) sleeping face +1F635 ; Emoji_Presentation # E0.6 [1] (😵) face with crossed-out eyes +1F636 ; Emoji_Presentation # E1.0 [1] (😶) face without mouth +1F637..1F640 ; Emoji_Presentation # E0.6 [10] (😷..🙀) face with medical mask..weary cat +1F641..1F644 ; Emoji_Presentation # E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes +1F645..1F64F ; Emoji_Presentation # E0.6 [11] (🙅..🙏) person gesturing NO..folded hands +1F680 ; Emoji_Presentation # E0.6 [1] (🚀) rocket +1F681..1F682 ; Emoji_Presentation # E1.0 [2] (🚁..🚂) helicopter..locomotive +1F683..1F685 ; Emoji_Presentation # E0.6 [3] (🚃..🚅) railway car..bullet train +1F686 ; Emoji_Presentation # E1.0 [1] (🚆) train +1F687 ; Emoji_Presentation # E0.6 [1] (🚇) metro +1F688 ; Emoji_Presentation # E1.0 [1] (🚈) light rail +1F689 ; Emoji_Presentation # E0.6 [1] (🚉) station +1F68A..1F68B ; Emoji_Presentation # E1.0 [2] (🚊..🚋) tram..tram car +1F68C ; Emoji_Presentation # E0.6 [1] (🚌) bus +1F68D ; Emoji_Presentation # E0.7 [1] (🚍) oncoming bus +1F68E ; Emoji_Presentation # E1.0 [1] (🚎) trolleybus +1F68F ; Emoji_Presentation # E0.6 [1] (🚏) bus stop +1F690 ; Emoji_Presentation # E1.0 [1] (🚐) minibus +1F691..1F693 ; Emoji_Presentation # E0.6 [3] (🚑..🚓) ambulance..police car +1F694 ; Emoji_Presentation # E0.7 [1] (🚔) oncoming police car +1F695 ; Emoji_Presentation # E0.6 [1] (🚕) taxi +1F696 ; Emoji_Presentation # E1.0 [1] (🚖) oncoming taxi +1F697 ; Emoji_Presentation # E0.6 [1] (🚗) automobile +1F698 ; Emoji_Presentation # E0.7 [1] (🚘) oncoming automobile +1F699..1F69A ; Emoji_Presentation # E0.6 [2] (🚙..🚚) sport utility vehicle..delivery truck +1F69B..1F6A1 ; Emoji_Presentation # E1.0 [7] (🚛..🚡) articulated lorry..aerial tramway +1F6A2 ; Emoji_Presentation # E0.6 [1] (🚢) ship +1F6A3 ; Emoji_Presentation # E1.0 [1] (🚣) person rowing boat +1F6A4..1F6A5 ; Emoji_Presentation # E0.6 [2] (🚤..🚥) speedboat..horizontal traffic light +1F6A6 ; Emoji_Presentation # E1.0 [1] (🚦) vertical traffic light +1F6A7..1F6AD ; Emoji_Presentation # E0.6 [7] (🚧..🚭) construction..no smoking +1F6AE..1F6B1 ; Emoji_Presentation # E1.0 [4] (🚮..🚱) litter in bin sign..non-potable water +1F6B2 ; Emoji_Presentation # E0.6 [1] (🚲) bicycle +1F6B3..1F6B5 ; Emoji_Presentation # E1.0 [3] (🚳..🚵) no bicycles..person mountain biking +1F6B6 ; Emoji_Presentation # E0.6 [1] (🚶) person walking +1F6B7..1F6B8 ; Emoji_Presentation # E1.0 [2] (🚷..🚸) no pedestrians..children crossing +1F6B9..1F6BE ; Emoji_Presentation # E0.6 [6] (🚹..🚾) men’s room..water closet +1F6BF ; Emoji_Presentation # E1.0 [1] (🚿) shower +1F6C0 ; Emoji_Presentation # E0.6 [1] (🛀) person taking bath +1F6C1..1F6C5 ; Emoji_Presentation # E1.0 [5] (🛁..🛅) bathtub..left luggage +1F6CC ; Emoji_Presentation # E1.0 [1] (🛌) person in bed +1F6D0 ; Emoji_Presentation # E1.0 [1] (🛐) place of worship +1F6D1..1F6D2 ; Emoji_Presentation # E3.0 [2] (🛑..🛒) stop sign..shopping cart +1F6D5 ; Emoji_Presentation # E12.0 [1] (🛕) hindu temple +1F6D6..1F6D7 ; Emoji_Presentation # E13.0 [2] (🛖..🛗) hut..elevator +1F6DD..1F6DF ; Emoji_Presentation # E14.0 [3] (🛝..🛟) playground slide..ring buoy +1F6EB..1F6EC ; Emoji_Presentation # E1.0 [2] (🛫..🛬) airplane departure..airplane arrival +1F6F4..1F6F6 ; Emoji_Presentation # E3.0 [3] (🛴..🛶) kick scooter..canoe +1F6F7..1F6F8 ; Emoji_Presentation # E5.0 [2] (🛷..🛸) sled..flying saucer +1F6F9 ; Emoji_Presentation # E11.0 [1] (🛹) skateboard +1F6FA ; Emoji_Presentation # E12.0 [1] (🛺) auto rickshaw +1F6FB..1F6FC ; Emoji_Presentation # E13.0 [2] (🛻..🛼) pickup truck..roller skate +1F7E0..1F7EB ; Emoji_Presentation # E12.0 [12] (🟠..🟫) orange circle..brown square +1F7F0 ; Emoji_Presentation # E14.0 [1] (🟰) heavy equals sign +1F90C ; Emoji_Presentation # E13.0 [1] (🤌) pinched fingers +1F90D..1F90F ; Emoji_Presentation # E12.0 [3] (🤍..🤏) white heart..pinching hand +1F910..1F918 ; Emoji_Presentation # E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns +1F919..1F91E ; Emoji_Presentation # E3.0 [6] (🤙..🤞) call me hand..crossed fingers +1F91F ; Emoji_Presentation # E5.0 [1] (🤟) love-you gesture +1F920..1F927 ; Emoji_Presentation # E3.0 [8] (🤠..🤧) cowboy hat face..sneezing face +1F928..1F92F ; Emoji_Presentation # E5.0 [8] (🤨..🤯) face with raised eyebrow..exploding head +1F930 ; Emoji_Presentation # E3.0 [1] (🤰) pregnant woman +1F931..1F932 ; Emoji_Presentation # E5.0 [2] (🤱..🤲) breast-feeding..palms up together +1F933..1F93A ; Emoji_Presentation # E3.0 [8] (🤳..🤺) selfie..person fencing +1F93C..1F93E ; Emoji_Presentation # E3.0 [3] (🤼..🤾) people wrestling..person playing handball +1F93F ; Emoji_Presentation # E12.0 [1] (🤿) diving mask +1F940..1F945 ; Emoji_Presentation # E3.0 [6] (🥀..🥅) wilted flower..goal net +1F947..1F94B ; Emoji_Presentation # E3.0 [5] (🥇..🥋) 1st place medal..martial arts uniform +1F94C ; Emoji_Presentation # E5.0 [1] (🥌) curling stone +1F94D..1F94F ; Emoji_Presentation # E11.0 [3] (🥍..🥏) lacrosse..flying disc +1F950..1F95E ; Emoji_Presentation # E3.0 [15] (🥐..🥞) croissant..pancakes +1F95F..1F96B ; Emoji_Presentation # E5.0 [13] (🥟..🥫) dumpling..canned food +1F96C..1F970 ; Emoji_Presentation # E11.0 [5] (🥬..🥰) leafy green..smiling face with hearts +1F971 ; Emoji_Presentation # E12.0 [1] (🥱) yawning face +1F972 ; Emoji_Presentation # E13.0 [1] (🥲) smiling face with tear +1F973..1F976 ; Emoji_Presentation # E11.0 [4] (🥳..🥶) partying face..cold face +1F977..1F978 ; Emoji_Presentation # E13.0 [2] (🥷..🥸) ninja..disguised face +1F979 ; Emoji_Presentation # E14.0 [1] (🥹) face holding back tears +1F97A ; Emoji_Presentation # E11.0 [1] (🥺) pleading face +1F97B ; Emoji_Presentation # E12.0 [1] (🥻) sari +1F97C..1F97F ; Emoji_Presentation # E11.0 [4] (🥼..🥿) lab coat..flat shoe +1F980..1F984 ; Emoji_Presentation # E1.0 [5] (🦀..🦄) crab..unicorn +1F985..1F991 ; Emoji_Presentation # E3.0 [13] (🦅..🦑) eagle..squid +1F992..1F997 ; Emoji_Presentation # E5.0 [6] (🦒..🦗) giraffe..cricket +1F998..1F9A2 ; Emoji_Presentation # E11.0 [11] (🦘..🦢) kangaroo..swan +1F9A3..1F9A4 ; Emoji_Presentation # E13.0 [2] (🦣..🦤) mammoth..dodo +1F9A5..1F9AA ; Emoji_Presentation # E12.0 [6] (🦥..🦪) sloth..oyster +1F9AB..1F9AD ; Emoji_Presentation # E13.0 [3] (🦫..🦭) beaver..seal +1F9AE..1F9AF ; Emoji_Presentation # E12.0 [2] (🦮..🦯) guide dog..white cane +1F9B0..1F9B9 ; Emoji_Presentation # E11.0 [10] (🦰..🦹) red hair..supervillain +1F9BA..1F9BF ; Emoji_Presentation # E12.0 [6] (🦺..🦿) safety vest..mechanical leg +1F9C0 ; Emoji_Presentation # E1.0 [1] (🧀) cheese wedge +1F9C1..1F9C2 ; Emoji_Presentation # E11.0 [2] (🧁..🧂) cupcake..salt +1F9C3..1F9CA ; Emoji_Presentation # E12.0 [8] (🧃..🧊) beverage box..ice +1F9CB ; Emoji_Presentation # E13.0 [1] (🧋) bubble tea +1F9CC ; Emoji_Presentation # E14.0 [1] (🧌) troll +1F9CD..1F9CF ; Emoji_Presentation # E12.0 [3] (🧍..🧏) person standing..deaf person +1F9D0..1F9E6 ; Emoji_Presentation # E5.0 [23] (🧐..🧦) face with monocle..socks +1F9E7..1F9FF ; Emoji_Presentation # E11.0 [25] (🧧..🧿) red envelope..nazar amulet +1FA70..1FA73 ; Emoji_Presentation # E12.0 [4] (🩰..🩳) ballet shoes..shorts +1FA74 ; Emoji_Presentation # E13.0 [1] (🩴) thong sandal +1FA78..1FA7A ; Emoji_Presentation # E12.0 [3] (🩸..🩺) drop of blood..stethoscope +1FA7B..1FA7C ; Emoji_Presentation # E14.0 [2] (🩻..🩼) x-ray..crutch +1FA80..1FA82 ; Emoji_Presentation # E12.0 [3] (🪀..🪂) yo-yo..parachute +1FA83..1FA86 ; Emoji_Presentation # E13.0 [4] (🪃..🪆) boomerang..nesting dolls +1FA90..1FA95 ; Emoji_Presentation # E12.0 [6] (🪐..🪕) ringed planet..banjo +1FA96..1FAA8 ; Emoji_Presentation # E13.0 [19] (🪖..🪨) military helmet..rock +1FAA9..1FAAC ; Emoji_Presentation # E14.0 [4] (🪩..🪬) mirror ball..hamsa +1FAB0..1FAB6 ; Emoji_Presentation # E13.0 [7] (🪰..🪶) fly..feather +1FAB7..1FABA ; Emoji_Presentation # E14.0 [4] (🪷..🪺) lotus..nest with eggs +1FAC0..1FAC2 ; Emoji_Presentation # E13.0 [3] (🫀..🫂) anatomical heart..people hugging +1FAC3..1FAC5 ; Emoji_Presentation # E14.0 [3] (🫃..🫅) pregnant man..person with crown +1FAD0..1FAD6 ; Emoji_Presentation # E13.0 [7] (🫐..🫖) blueberries..teapot +1FAD7..1FAD9 ; Emoji_Presentation # E14.0 [3] (🫗..🫙) pouring liquid..jar +1FAE0..1FAE7 ; Emoji_Presentation # E14.0 [8] (🫠..🫧) melting face..bubbles +1FAF0..1FAF6 ; Emoji_Presentation # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands + +# Total elements: 1185 + +# ================================================ + +# All omitted code points have Emoji_Modifier=No +# @missing: 0000..10FFFF ; Emoji_Modifier ; No + +1F3FB..1F3FF ; Emoji_Modifier # E1.0 [5] (🏻..🏿) light skin tone..dark skin tone + +# Total elements: 5 + +# ================================================ + +# All omitted code points have Emoji_Modifier_Base=No +# @missing: 0000..10FFFF ; Emoji_Modifier_Base ; No + +261D ; Emoji_Modifier_Base # E0.6 [1] (☝️) index pointing up +26F9 ; Emoji_Modifier_Base # E0.7 [1] (⛹️) person bouncing ball +270A..270C ; Emoji_Modifier_Base # E0.6 [3] (✊..✌️) raised fist..victory hand +270D ; Emoji_Modifier_Base # E0.7 [1] (✍️) writing hand +1F385 ; Emoji_Modifier_Base # E0.6 [1] (🎅) Santa Claus +1F3C2..1F3C4 ; Emoji_Modifier_Base # E0.6 [3] (🏂..🏄) snowboarder..person surfing +1F3C7 ; Emoji_Modifier_Base # E1.0 [1] (🏇) horse racing +1F3CA ; Emoji_Modifier_Base # E0.6 [1] (🏊) person swimming +1F3CB..1F3CC ; Emoji_Modifier_Base # E0.7 [2] (🏋️..🏌️) person lifting weights..person golfing +1F442..1F443 ; Emoji_Modifier_Base # E0.6 [2] (👂..👃) ear..nose +1F446..1F450 ; Emoji_Modifier_Base # E0.6 [11] (👆..👐) backhand index pointing up..open hands +1F466..1F46B ; Emoji_Modifier_Base # E0.6 [6] (👦..👫) boy..woman and man holding hands +1F46C..1F46D ; Emoji_Modifier_Base # E1.0 [2] (👬..👭) men holding hands..women holding hands +1F46E..1F478 ; Emoji_Modifier_Base # E0.6 [11] (👮..👸) police officer..princess +1F47C ; Emoji_Modifier_Base # E0.6 [1] (👼) baby angel +1F481..1F483 ; Emoji_Modifier_Base # E0.6 [3] (💁..💃) person tipping hand..woman dancing +1F485..1F487 ; Emoji_Modifier_Base # E0.6 [3] (💅..💇) nail polish..person getting haircut +1F48F ; Emoji_Modifier_Base # E0.6 [1] (💏) kiss +1F491 ; Emoji_Modifier_Base # E0.6 [1] (💑) couple with heart +1F4AA ; Emoji_Modifier_Base # E0.6 [1] (💪) flexed biceps +1F574..1F575 ; Emoji_Modifier_Base # E0.7 [2] (🕴️..🕵️) person in suit levitating..detective +1F57A ; Emoji_Modifier_Base # E3.0 [1] (🕺) man dancing +1F590 ; Emoji_Modifier_Base # E0.7 [1] (🖐️) hand with fingers splayed +1F595..1F596 ; Emoji_Modifier_Base # E1.0 [2] (🖕..🖖) middle finger..vulcan salute +1F645..1F647 ; Emoji_Modifier_Base # E0.6 [3] (🙅..🙇) person gesturing NO..person bowing +1F64B..1F64F ; Emoji_Modifier_Base # E0.6 [5] (🙋..🙏) person raising hand..folded hands +1F6A3 ; Emoji_Modifier_Base # E1.0 [1] (🚣) person rowing boat +1F6B4..1F6B5 ; Emoji_Modifier_Base # E1.0 [2] (🚴..🚵) person biking..person mountain biking +1F6B6 ; Emoji_Modifier_Base # E0.6 [1] (🚶) person walking +1F6C0 ; Emoji_Modifier_Base # E0.6 [1] (🛀) person taking bath +1F6CC ; Emoji_Modifier_Base # E1.0 [1] (🛌) person in bed +1F90C ; Emoji_Modifier_Base # E13.0 [1] (🤌) pinched fingers +1F90F ; Emoji_Modifier_Base # E12.0 [1] (🤏) pinching hand +1F918 ; Emoji_Modifier_Base # E1.0 [1] (🤘) sign of the horns +1F919..1F91E ; Emoji_Modifier_Base # E3.0 [6] (🤙..🤞) call me hand..crossed fingers +1F91F ; Emoji_Modifier_Base # E5.0 [1] (🤟) love-you gesture +1F926 ; Emoji_Modifier_Base # E3.0 [1] (🤦) person facepalming +1F930 ; Emoji_Modifier_Base # E3.0 [1] (🤰) pregnant woman +1F931..1F932 ; Emoji_Modifier_Base # E5.0 [2] (🤱..🤲) breast-feeding..palms up together +1F933..1F939 ; Emoji_Modifier_Base # E3.0 [7] (🤳..🤹) selfie..person juggling +1F93C..1F93E ; Emoji_Modifier_Base # E3.0 [3] (🤼..🤾) people wrestling..person playing handball +1F977 ; Emoji_Modifier_Base # E13.0 [1] (🥷) ninja +1F9B5..1F9B6 ; Emoji_Modifier_Base # E11.0 [2] (🦵..🦶) leg..foot +1F9B8..1F9B9 ; Emoji_Modifier_Base # E11.0 [2] (🦸..🦹) superhero..supervillain +1F9BB ; Emoji_Modifier_Base # E12.0 [1] (🦻) ear with hearing aid +1F9CD..1F9CF ; Emoji_Modifier_Base # E12.0 [3] (🧍..🧏) person standing..deaf person +1F9D1..1F9DD ; Emoji_Modifier_Base # E5.0 [13] (🧑..🧝) person..elf +1FAC3..1FAC5 ; Emoji_Modifier_Base # E14.0 [3] (🫃..🫅) pregnant man..person with crown +1FAF0..1FAF6 ; Emoji_Modifier_Base # E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands + +# Total elements: 132 + +# ================================================ + +# All omitted code points have Emoji_Component=No +# @missing: 0000..10FFFF ; Emoji_Component ; No + +0023 ; Emoji_Component # E0.0 [1] (#️) hash sign +002A ; Emoji_Component # E0.0 [1] (*️) asterisk +0030..0039 ; Emoji_Component # E0.0 [10] (0️..9️) digit zero..digit nine +200D ; Emoji_Component # E0.0 [1] (‍) zero width joiner +20E3 ; Emoji_Component # E0.0 [1] (⃣) combining enclosing keycap +FE0F ; Emoji_Component # E0.0 [1] () VARIATION SELECTOR-16 +1F1E6..1F1FF ; Emoji_Component # E0.0 [26] (🇦..🇿) regional indicator symbol letter a..regional indicator symbol letter z +1F3FB..1F3FF ; Emoji_Component # E1.0 [5] (🏻..🏿) light skin tone..dark skin tone +1F9B0..1F9B3 ; Emoji_Component # E11.0 [4] (🦰..🦳) red hair..white hair +E0020..E007F ; Emoji_Component # E0.0 [96] (󠀠..󠁿) tag space..cancel tag + +# Total elements: 146 + +# ================================================ + +# All omitted code points have Extended_Pictographic=No +# @missing: 0000..10FFFF ; Extended_Pictographic ; No + +00A9 ; Extended_Pictographic# E0.6 [1] (©️) copyright +00AE ; Extended_Pictographic# E0.6 [1] (®️) registered +203C ; Extended_Pictographic# E0.6 [1] (‼️) double exclamation mark +2049 ; Extended_Pictographic# E0.6 [1] (⁉️) exclamation question mark +2122 ; Extended_Pictographic# E0.6 [1] (™️) trade mark +2139 ; Extended_Pictographic# E0.6 [1] (ℹ️) information +2194..2199 ; Extended_Pictographic# E0.6 [6] (↔️..↙️) left-right arrow..down-left arrow +21A9..21AA ; Extended_Pictographic# E0.6 [2] (↩️..↪️) right arrow curving left..left arrow curving right +231A..231B ; Extended_Pictographic# E0.6 [2] (⌚..⌛) watch..hourglass done +2328 ; Extended_Pictographic# E1.0 [1] (⌨️) keyboard +2388 ; Extended_Pictographic# E0.0 [1] (⎈) HELM SYMBOL +23CF ; Extended_Pictographic# E1.0 [1] (⏏️) eject button +23E9..23EC ; Extended_Pictographic# E0.6 [4] (⏩..⏬) fast-forward button..fast down button +23ED..23EE ; Extended_Pictographic# E0.7 [2] (⏭️..⏮️) next track button..last track button +23EF ; Extended_Pictographic# E1.0 [1] (⏯️) play or pause button +23F0 ; Extended_Pictographic# E0.6 [1] (⏰) alarm clock +23F1..23F2 ; Extended_Pictographic# E1.0 [2] (⏱️..⏲️) stopwatch..timer clock +23F3 ; Extended_Pictographic# E0.6 [1] (⏳) hourglass not done +23F8..23FA ; Extended_Pictographic# E0.7 [3] (⏸️..⏺️) pause button..record button +24C2 ; Extended_Pictographic# E0.6 [1] (Ⓜ️) circled M +25AA..25AB ; Extended_Pictographic# E0.6 [2] (▪️..▫️) black small square..white small square +25B6 ; Extended_Pictographic# E0.6 [1] (▶️) play button +25C0 ; Extended_Pictographic# E0.6 [1] (◀️) reverse button +25FB..25FE ; Extended_Pictographic# E0.6 [4] (◻️..◾) white medium square..black medium-small square +2600..2601 ; Extended_Pictographic# E0.6 [2] (☀️..☁️) sun..cloud +2602..2603 ; Extended_Pictographic# E0.7 [2] (☂️..☃️) umbrella..snowman +2604 ; Extended_Pictographic# E1.0 [1] (☄️) comet +2605 ; Extended_Pictographic# E0.0 [1] (★) BLACK STAR +2607..260D ; Extended_Pictographic# E0.0 [7] (☇..☍) LIGHTNING..OPPOSITION +260E ; Extended_Pictographic# E0.6 [1] (☎️) telephone +260F..2610 ; Extended_Pictographic# E0.0 [2] (☏..☐) WHITE TELEPHONE..BALLOT BOX +2611 ; Extended_Pictographic# E0.6 [1] (☑️) check box with check +2612 ; Extended_Pictographic# E0.0 [1] (☒) BALLOT BOX WITH X +2614..2615 ; Extended_Pictographic# E0.6 [2] (☔..☕) umbrella with rain drops..hot beverage +2616..2617 ; Extended_Pictographic# E0.0 [2] (☖..☗) WHITE SHOGI PIECE..BLACK SHOGI PIECE +2618 ; Extended_Pictographic# E1.0 [1] (☘️) shamrock +2619..261C ; Extended_Pictographic# E0.0 [4] (☙..☜) REVERSED ROTATED FLORAL HEART BULLET..WHITE LEFT POINTING INDEX +261D ; Extended_Pictographic# E0.6 [1] (☝️) index pointing up +261E..261F ; Extended_Pictographic# E0.0 [2] (☞..☟) WHITE RIGHT POINTING INDEX..WHITE DOWN POINTING INDEX +2620 ; Extended_Pictographic# E1.0 [1] (☠️) skull and crossbones +2621 ; Extended_Pictographic# E0.0 [1] (☡) CAUTION SIGN +2622..2623 ; Extended_Pictographic# E1.0 [2] (☢️..☣️) radioactive..biohazard +2624..2625 ; Extended_Pictographic# E0.0 [2] (☤..☥) CADUCEUS..ANKH +2626 ; Extended_Pictographic# E1.0 [1] (☦️) orthodox cross +2627..2629 ; Extended_Pictographic# E0.0 [3] (☧..☩) CHI RHO..CROSS OF JERUSALEM +262A ; Extended_Pictographic# E0.7 [1] (☪️) star and crescent +262B..262D ; Extended_Pictographic# E0.0 [3] (☫..☭) FARSI SYMBOL..HAMMER AND SICKLE +262E ; Extended_Pictographic# E1.0 [1] (☮️) peace symbol +262F ; Extended_Pictographic# E0.7 [1] (☯️) yin yang +2630..2637 ; Extended_Pictographic# E0.0 [8] (☰..☷) TRIGRAM FOR HEAVEN..TRIGRAM FOR EARTH +2638..2639 ; Extended_Pictographic# E0.7 [2] (☸️..☹️) wheel of dharma..frowning face +263A ; Extended_Pictographic# E0.6 [1] (☺️) smiling face +263B..263F ; Extended_Pictographic# E0.0 [5] (☻..☿) BLACK SMILING FACE..MERCURY +2640 ; Extended_Pictographic# E4.0 [1] (♀️) female sign +2641 ; Extended_Pictographic# E0.0 [1] (♁) EARTH +2642 ; Extended_Pictographic# E4.0 [1] (♂️) male sign +2643..2647 ; Extended_Pictographic# E0.0 [5] (♃..♇) JUPITER..PLUTO +2648..2653 ; Extended_Pictographic# E0.6 [12] (♈..♓) Aries..Pisces +2654..265E ; Extended_Pictographic# E0.0 [11] (♔..♞) WHITE CHESS KING..BLACK CHESS KNIGHT +265F ; Extended_Pictographic# E11.0 [1] (♟️) chess pawn +2660 ; Extended_Pictographic# E0.6 [1] (♠️) spade suit +2661..2662 ; Extended_Pictographic# E0.0 [2] (♡..♢) WHITE HEART SUIT..WHITE DIAMOND SUIT +2663 ; Extended_Pictographic# E0.6 [1] (♣️) club suit +2664 ; Extended_Pictographic# E0.0 [1] (♤) WHITE SPADE SUIT +2665..2666 ; Extended_Pictographic# E0.6 [2] (♥️..♦️) heart suit..diamond suit +2667 ; Extended_Pictographic# E0.0 [1] (♧) WHITE CLUB SUIT +2668 ; Extended_Pictographic# E0.6 [1] (♨️) hot springs +2669..267A ; Extended_Pictographic# E0.0 [18] (♩..♺) QUARTER NOTE..RECYCLING SYMBOL FOR GENERIC MATERIALS +267B ; Extended_Pictographic# E0.6 [1] (♻️) recycling symbol +267C..267D ; Extended_Pictographic# E0.0 [2] (♼..♽) RECYCLED PAPER SYMBOL..PARTIALLY-RECYCLED PAPER SYMBOL +267E ; Extended_Pictographic# E11.0 [1] (♾️) infinity +267F ; Extended_Pictographic# E0.6 [1] (♿) wheelchair symbol +2680..2685 ; Extended_Pictographic# E0.0 [6] (⚀..⚅) DIE FACE-1..DIE FACE-6 +2690..2691 ; Extended_Pictographic# E0.0 [2] (⚐..⚑) WHITE FLAG..BLACK FLAG +2692 ; Extended_Pictographic# E1.0 [1] (⚒️) hammer and pick +2693 ; Extended_Pictographic# E0.6 [1] (⚓) anchor +2694 ; Extended_Pictographic# E1.0 [1] (⚔️) crossed swords +2695 ; Extended_Pictographic# E4.0 [1] (⚕️) medical symbol +2696..2697 ; Extended_Pictographic# E1.0 [2] (⚖️..⚗️) balance scale..alembic +2698 ; Extended_Pictographic# E0.0 [1] (⚘) FLOWER +2699 ; Extended_Pictographic# E1.0 [1] (⚙️) gear +269A ; Extended_Pictographic# E0.0 [1] (⚚) STAFF OF HERMES +269B..269C ; Extended_Pictographic# E1.0 [2] (⚛️..⚜️) atom symbol..fleur-de-lis +269D..269F ; Extended_Pictographic# E0.0 [3] (⚝..⚟) OUTLINED WHITE STAR..THREE LINES CONVERGING LEFT +26A0..26A1 ; Extended_Pictographic# E0.6 [2] (⚠️..⚡) warning..high voltage +26A2..26A6 ; Extended_Pictographic# E0.0 [5] (⚢..⚦) DOUBLED FEMALE SIGN..MALE WITH STROKE SIGN +26A7 ; Extended_Pictographic# E13.0 [1] (⚧️) transgender symbol +26A8..26A9 ; Extended_Pictographic# E0.0 [2] (⚨..⚩) VERTICAL MALE WITH STROKE SIGN..HORIZONTAL MALE WITH STROKE SIGN +26AA..26AB ; Extended_Pictographic# E0.6 [2] (⚪..⚫) white circle..black circle +26AC..26AF ; Extended_Pictographic# E0.0 [4] (⚬..⚯) MEDIUM SMALL WHITE CIRCLE..UNMARRIED PARTNERSHIP SYMBOL +26B0..26B1 ; Extended_Pictographic# E1.0 [2] (⚰️..⚱️) coffin..funeral urn +26B2..26BC ; Extended_Pictographic# E0.0 [11] (⚲..⚼) NEUTER..SESQUIQUADRATE +26BD..26BE ; Extended_Pictographic# E0.6 [2] (⚽..⚾) soccer ball..baseball +26BF..26C3 ; Extended_Pictographic# E0.0 [5] (⚿..⛃) SQUARED KEY..BLACK DRAUGHTS KING +26C4..26C5 ; Extended_Pictographic# E0.6 [2] (⛄..⛅) snowman without snow..sun behind cloud +26C6..26C7 ; Extended_Pictographic# E0.0 [2] (⛆..⛇) RAIN..BLACK SNOWMAN +26C8 ; Extended_Pictographic# E0.7 [1] (⛈️) cloud with lightning and rain +26C9..26CD ; Extended_Pictographic# E0.0 [5] (⛉..⛍) TURNED WHITE SHOGI PIECE..DISABLED CAR +26CE ; Extended_Pictographic# E0.6 [1] (⛎) Ophiuchus +26CF ; Extended_Pictographic# E0.7 [1] (⛏️) pick +26D0 ; Extended_Pictographic# E0.0 [1] (⛐) CAR SLIDING +26D1 ; Extended_Pictographic# E0.7 [1] (⛑️) rescue worker’s helmet +26D2 ; Extended_Pictographic# E0.0 [1] (⛒) CIRCLED CROSSING LANES +26D3 ; Extended_Pictographic# E0.7 [1] (⛓️) chains +26D4 ; Extended_Pictographic# E0.6 [1] (⛔) no entry +26D5..26E8 ; Extended_Pictographic# E0.0 [20] (⛕..⛨) ALTERNATE ONE-WAY LEFT WAY TRAFFIC..BLACK CROSS ON SHIELD +26E9 ; Extended_Pictographic# E0.7 [1] (⛩️) shinto shrine +26EA ; Extended_Pictographic# E0.6 [1] (⛪) church +26EB..26EF ; Extended_Pictographic# E0.0 [5] (⛫..⛯) CASTLE..MAP SYMBOL FOR LIGHTHOUSE +26F0..26F1 ; Extended_Pictographic# E0.7 [2] (⛰️..⛱️) mountain..umbrella on ground +26F2..26F3 ; Extended_Pictographic# E0.6 [2] (⛲..⛳) fountain..flag in hole +26F4 ; Extended_Pictographic# E0.7 [1] (⛴️) ferry +26F5 ; Extended_Pictographic# E0.6 [1] (⛵) sailboat +26F6 ; Extended_Pictographic# E0.0 [1] (⛶) SQUARE FOUR CORNERS +26F7..26F9 ; Extended_Pictographic# E0.7 [3] (⛷️..⛹️) skier..person bouncing ball +26FA ; Extended_Pictographic# E0.6 [1] (⛺) tent +26FB..26FC ; Extended_Pictographic# E0.0 [2] (⛻..⛼) JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL +26FD ; Extended_Pictographic# E0.6 [1] (⛽) fuel pump +26FE..2701 ; Extended_Pictographic# E0.0 [4] (⛾..✁) CUP ON BLACK SQUARE..UPPER BLADE SCISSORS +2702 ; Extended_Pictographic# E0.6 [1] (✂️) scissors +2703..2704 ; Extended_Pictographic# E0.0 [2] (✃..✄) LOWER BLADE SCISSORS..WHITE SCISSORS +2705 ; Extended_Pictographic# E0.6 [1] (✅) check mark button +2708..270C ; Extended_Pictographic# E0.6 [5] (✈️..✌️) airplane..victory hand +270D ; Extended_Pictographic# E0.7 [1] (✍️) writing hand +270E ; Extended_Pictographic# E0.0 [1] (✎) LOWER RIGHT PENCIL +270F ; Extended_Pictographic# E0.6 [1] (✏️) pencil +2710..2711 ; Extended_Pictographic# E0.0 [2] (✐..✑) UPPER RIGHT PENCIL..WHITE NIB +2712 ; Extended_Pictographic# E0.6 [1] (✒️) black nib +2714 ; Extended_Pictographic# E0.6 [1] (✔️) check mark +2716 ; Extended_Pictographic# E0.6 [1] (✖️) multiply +271D ; Extended_Pictographic# E0.7 [1] (✝️) latin cross +2721 ; Extended_Pictographic# E0.7 [1] (✡️) star of David +2728 ; Extended_Pictographic# E0.6 [1] (✨) sparkles +2733..2734 ; Extended_Pictographic# E0.6 [2] (✳️..✴️) eight-spoked asterisk..eight-pointed star +2744 ; Extended_Pictographic# E0.6 [1] (❄️) snowflake +2747 ; Extended_Pictographic# E0.6 [1] (❇️) sparkle +274C ; Extended_Pictographic# E0.6 [1] (❌) cross mark +274E ; Extended_Pictographic# E0.6 [1] (❎) cross mark button +2753..2755 ; Extended_Pictographic# E0.6 [3] (❓..❕) red question mark..white exclamation mark +2757 ; Extended_Pictographic# E0.6 [1] (❗) red exclamation mark +2763 ; Extended_Pictographic# E1.0 [1] (❣️) heart exclamation +2764 ; Extended_Pictographic# E0.6 [1] (❤️) red heart +2765..2767 ; Extended_Pictographic# E0.0 [3] (❥..❧) ROTATED HEAVY BLACK HEART BULLET..ROTATED FLORAL HEART BULLET +2795..2797 ; Extended_Pictographic# E0.6 [3] (➕..➗) plus..divide +27A1 ; Extended_Pictographic# E0.6 [1] (➡️) right arrow +27B0 ; Extended_Pictographic# E0.6 [1] (➰) curly loop +27BF ; Extended_Pictographic# E1.0 [1] (➿) double curly loop +2934..2935 ; Extended_Pictographic# E0.6 [2] (⤴️..⤵️) right arrow curving up..right arrow curving down +2B05..2B07 ; Extended_Pictographic# E0.6 [3] (⬅️..⬇️) left arrow..down arrow +2B1B..2B1C ; Extended_Pictographic# E0.6 [2] (⬛..⬜) black large square..white large square +2B50 ; Extended_Pictographic# E0.6 [1] (⭐) star +2B55 ; Extended_Pictographic# E0.6 [1] (⭕) hollow red circle +3030 ; Extended_Pictographic# E0.6 [1] (〰️) wavy dash +303D ; Extended_Pictographic# E0.6 [1] (〽️) part alternation mark +3297 ; Extended_Pictographic# E0.6 [1] (㊗️) Japanese “congratulations” button +3299 ; Extended_Pictographic# E0.6 [1] (㊙️) Japanese “secret” button +1F000..1F003 ; Extended_Pictographic# E0.0 [4] (🀀..🀃) MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND +1F004 ; Extended_Pictographic# E0.6 [1] (🀄) mahjong red dragon +1F005..1F0CE ; Extended_Pictographic# E0.0 [202] (🀅..🃎) MAHJONG TILE GREEN DRAGON..PLAYING CARD KING OF DIAMONDS +1F0CF ; Extended_Pictographic# E0.6 [1] (🃏) joker +1F0D0..1F0FF ; Extended_Pictographic# E0.0 [48] (🃐..🃿) .. +1F10D..1F10F ; Extended_Pictographic# E0.0 [3] (🄍..🄏) CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH +1F12F ; Extended_Pictographic# E0.0 [1] (🄯) COPYLEFT SYMBOL +1F16C..1F16F ; Extended_Pictographic# E0.0 [4] (🅬..🅯) RAISED MR SIGN..CIRCLED HUMAN FIGURE +1F170..1F171 ; Extended_Pictographic# E0.6 [2] (🅰️..🅱️) A button (blood type)..B button (blood type) +1F17E..1F17F ; Extended_Pictographic# E0.6 [2] (🅾️..🅿️) O button (blood type)..P button +1F18E ; Extended_Pictographic# E0.6 [1] (🆎) AB button (blood type) +1F191..1F19A ; Extended_Pictographic# E0.6 [10] (🆑..🆚) CL button..VS button +1F1AD..1F1E5 ; Extended_Pictographic# E0.0 [57] (🆭..🇥) MASK WORK SYMBOL.. +1F201..1F202 ; Extended_Pictographic# E0.6 [2] (🈁..🈂️) Japanese “here” button..Japanese “service charge” button +1F203..1F20F ; Extended_Pictographic# E0.0 [13] (🈃..🈏) .. +1F21A ; Extended_Pictographic# E0.6 [1] (🈚) Japanese “free of charge” button +1F22F ; Extended_Pictographic# E0.6 [1] (🈯) Japanese “reserved” button +1F232..1F23A ; Extended_Pictographic# E0.6 [9] (🈲..🈺) Japanese “prohibited” button..Japanese “open for business” button +1F23C..1F23F ; Extended_Pictographic# E0.0 [4] (🈼..🈿) .. +1F249..1F24F ; Extended_Pictographic# E0.0 [7] (🉉..🉏) .. +1F250..1F251 ; Extended_Pictographic# E0.6 [2] (🉐..🉑) Japanese “bargain” button..Japanese “acceptable” button +1F252..1F2FF ; Extended_Pictographic# E0.0 [174] (🉒..🋿) .. +1F300..1F30C ; Extended_Pictographic# E0.6 [13] (🌀..🌌) cyclone..milky way +1F30D..1F30E ; Extended_Pictographic# E0.7 [2] (🌍..🌎) globe showing Europe-Africa..globe showing Americas +1F30F ; Extended_Pictographic# E0.6 [1] (🌏) globe showing Asia-Australia +1F310 ; Extended_Pictographic# E1.0 [1] (🌐) globe with meridians +1F311 ; Extended_Pictographic# E0.6 [1] (🌑) new moon +1F312 ; Extended_Pictographic# E1.0 [1] (🌒) waxing crescent moon +1F313..1F315 ; Extended_Pictographic# E0.6 [3] (🌓..🌕) first quarter moon..full moon +1F316..1F318 ; Extended_Pictographic# E1.0 [3] (🌖..🌘) waning gibbous moon..waning crescent moon +1F319 ; Extended_Pictographic# E0.6 [1] (🌙) crescent moon +1F31A ; Extended_Pictographic# E1.0 [1] (🌚) new moon face +1F31B ; Extended_Pictographic# E0.6 [1] (🌛) first quarter moon face +1F31C ; Extended_Pictographic# E0.7 [1] (🌜) last quarter moon face +1F31D..1F31E ; Extended_Pictographic# E1.0 [2] (🌝..🌞) full moon face..sun with face +1F31F..1F320 ; Extended_Pictographic# E0.6 [2] (🌟..🌠) glowing star..shooting star +1F321 ; Extended_Pictographic# E0.7 [1] (🌡️) thermometer +1F322..1F323 ; Extended_Pictographic# E0.0 [2] (🌢..🌣) BLACK DROPLET..WHITE SUN +1F324..1F32C ; Extended_Pictographic# E0.7 [9] (🌤️..🌬️) sun behind small cloud..wind face +1F32D..1F32F ; Extended_Pictographic# E1.0 [3] (🌭..🌯) hot dog..burrito +1F330..1F331 ; Extended_Pictographic# E0.6 [2] (🌰..🌱) chestnut..seedling +1F332..1F333 ; Extended_Pictographic# E1.0 [2] (🌲..🌳) evergreen tree..deciduous tree +1F334..1F335 ; Extended_Pictographic# E0.6 [2] (🌴..🌵) palm tree..cactus +1F336 ; Extended_Pictographic# E0.7 [1] (🌶️) hot pepper +1F337..1F34A ; Extended_Pictographic# E0.6 [20] (🌷..🍊) tulip..tangerine +1F34B ; Extended_Pictographic# E1.0 [1] (🍋) lemon +1F34C..1F34F ; Extended_Pictographic# E0.6 [4] (🍌..🍏) banana..green apple +1F350 ; Extended_Pictographic# E1.0 [1] (🍐) pear +1F351..1F37B ; Extended_Pictographic# E0.6 [43] (🍑..🍻) peach..clinking beer mugs +1F37C ; Extended_Pictographic# E1.0 [1] (🍼) baby bottle +1F37D ; Extended_Pictographic# E0.7 [1] (🍽️) fork and knife with plate +1F37E..1F37F ; Extended_Pictographic# E1.0 [2] (🍾..🍿) bottle with popping cork..popcorn +1F380..1F393 ; Extended_Pictographic# E0.6 [20] (🎀..🎓) ribbon..graduation cap +1F394..1F395 ; Extended_Pictographic# E0.0 [2] (🎔..🎕) HEART WITH TIP ON THE LEFT..BOUQUET OF FLOWERS +1F396..1F397 ; Extended_Pictographic# E0.7 [2] (🎖️..🎗️) military medal..reminder ribbon +1F398 ; Extended_Pictographic# E0.0 [1] (🎘) MUSICAL KEYBOARD WITH JACKS +1F399..1F39B ; Extended_Pictographic# E0.7 [3] (🎙️..🎛️) studio microphone..control knobs +1F39C..1F39D ; Extended_Pictographic# E0.0 [2] (🎜..🎝) BEAMED ASCENDING MUSICAL NOTES..BEAMED DESCENDING MUSICAL NOTES +1F39E..1F39F ; Extended_Pictographic# E0.7 [2] (🎞️..🎟️) film frames..admission tickets +1F3A0..1F3C4 ; Extended_Pictographic# E0.6 [37] (🎠..🏄) carousel horse..person surfing +1F3C5 ; Extended_Pictographic# E1.0 [1] (🏅) sports medal +1F3C6 ; Extended_Pictographic# E0.6 [1] (🏆) trophy +1F3C7 ; Extended_Pictographic# E1.0 [1] (🏇) horse racing +1F3C8 ; Extended_Pictographic# E0.6 [1] (🏈) american football +1F3C9 ; Extended_Pictographic# E1.0 [1] (🏉) rugby football +1F3CA ; Extended_Pictographic# E0.6 [1] (🏊) person swimming +1F3CB..1F3CE ; Extended_Pictographic# E0.7 [4] (🏋️..🏎️) person lifting weights..racing car +1F3CF..1F3D3 ; Extended_Pictographic# E1.0 [5] (🏏..🏓) cricket game..ping pong +1F3D4..1F3DF ; Extended_Pictographic# E0.7 [12] (🏔️..🏟️) snow-capped mountain..stadium +1F3E0..1F3E3 ; Extended_Pictographic# E0.6 [4] (🏠..🏣) house..Japanese post office +1F3E4 ; Extended_Pictographic# E1.0 [1] (🏤) post office +1F3E5..1F3F0 ; Extended_Pictographic# E0.6 [12] (🏥..🏰) hospital..castle +1F3F1..1F3F2 ; Extended_Pictographic# E0.0 [2] (🏱..🏲) WHITE PENNANT..BLACK PENNANT +1F3F3 ; Extended_Pictographic# E0.7 [1] (🏳️) white flag +1F3F4 ; Extended_Pictographic# E1.0 [1] (🏴) black flag +1F3F5 ; Extended_Pictographic# E0.7 [1] (🏵️) rosette +1F3F6 ; Extended_Pictographic# E0.0 [1] (🏶) BLACK ROSETTE +1F3F7 ; Extended_Pictographic# E0.7 [1] (🏷️) label +1F3F8..1F3FA ; Extended_Pictographic# E1.0 [3] (🏸..🏺) badminton..amphora +1F400..1F407 ; Extended_Pictographic# E1.0 [8] (🐀..🐇) rat..rabbit +1F408 ; Extended_Pictographic# E0.7 [1] (🐈) cat +1F409..1F40B ; Extended_Pictographic# E1.0 [3] (🐉..🐋) dragon..whale +1F40C..1F40E ; Extended_Pictographic# E0.6 [3] (🐌..🐎) snail..horse +1F40F..1F410 ; Extended_Pictographic# E1.0 [2] (🐏..🐐) ram..goat +1F411..1F412 ; Extended_Pictographic# E0.6 [2] (🐑..🐒) ewe..monkey +1F413 ; Extended_Pictographic# E1.0 [1] (🐓) rooster +1F414 ; Extended_Pictographic# E0.6 [1] (🐔) chicken +1F415 ; Extended_Pictographic# E0.7 [1] (🐕) dog +1F416 ; Extended_Pictographic# E1.0 [1] (🐖) pig +1F417..1F429 ; Extended_Pictographic# E0.6 [19] (🐗..🐩) boar..poodle +1F42A ; Extended_Pictographic# E1.0 [1] (🐪) camel +1F42B..1F43E ; Extended_Pictographic# E0.6 [20] (🐫..🐾) two-hump camel..paw prints +1F43F ; Extended_Pictographic# E0.7 [1] (🐿️) chipmunk +1F440 ; Extended_Pictographic# E0.6 [1] (👀) eyes +1F441 ; Extended_Pictographic# E0.7 [1] (👁️) eye +1F442..1F464 ; Extended_Pictographic# E0.6 [35] (👂..👤) ear..bust in silhouette +1F465 ; Extended_Pictographic# E1.0 [1] (👥) busts in silhouette +1F466..1F46B ; Extended_Pictographic# E0.6 [6] (👦..👫) boy..woman and man holding hands +1F46C..1F46D ; Extended_Pictographic# E1.0 [2] (👬..👭) men holding hands..women holding hands +1F46E..1F4AC ; Extended_Pictographic# E0.6 [63] (👮..💬) police officer..speech balloon +1F4AD ; Extended_Pictographic# E1.0 [1] (💭) thought balloon +1F4AE..1F4B5 ; Extended_Pictographic# E0.6 [8] (💮..💵) white flower..dollar banknote +1F4B6..1F4B7 ; Extended_Pictographic# E1.0 [2] (💶..💷) euro banknote..pound banknote +1F4B8..1F4EB ; Extended_Pictographic# E0.6 [52] (💸..📫) money with wings..closed mailbox with raised flag +1F4EC..1F4ED ; Extended_Pictographic# E0.7 [2] (📬..📭) open mailbox with raised flag..open mailbox with lowered flag +1F4EE ; Extended_Pictographic# E0.6 [1] (📮) postbox +1F4EF ; Extended_Pictographic# E1.0 [1] (📯) postal horn +1F4F0..1F4F4 ; Extended_Pictographic# E0.6 [5] (📰..📴) newspaper..mobile phone off +1F4F5 ; Extended_Pictographic# E1.0 [1] (📵) no mobile phones +1F4F6..1F4F7 ; Extended_Pictographic# E0.6 [2] (📶..📷) antenna bars..camera +1F4F8 ; Extended_Pictographic# E1.0 [1] (📸) camera with flash +1F4F9..1F4FC ; Extended_Pictographic# E0.6 [4] (📹..📼) video camera..videocassette +1F4FD ; Extended_Pictographic# E0.7 [1] (📽️) film projector +1F4FE ; Extended_Pictographic# E0.0 [1] (📾) PORTABLE STEREO +1F4FF..1F502 ; Extended_Pictographic# E1.0 [4] (📿..🔂) prayer beads..repeat single button +1F503 ; Extended_Pictographic# E0.6 [1] (🔃) clockwise vertical arrows +1F504..1F507 ; Extended_Pictographic# E1.0 [4] (🔄..🔇) counterclockwise arrows button..muted speaker +1F508 ; Extended_Pictographic# E0.7 [1] (🔈) speaker low volume +1F509 ; Extended_Pictographic# E1.0 [1] (🔉) speaker medium volume +1F50A..1F514 ; Extended_Pictographic# E0.6 [11] (🔊..🔔) speaker high volume..bell +1F515 ; Extended_Pictographic# E1.0 [1] (🔕) bell with slash +1F516..1F52B ; Extended_Pictographic# E0.6 [22] (🔖..🔫) bookmark..water pistol +1F52C..1F52D ; Extended_Pictographic# E1.0 [2] (🔬..🔭) microscope..telescope +1F52E..1F53D ; Extended_Pictographic# E0.6 [16] (🔮..🔽) crystal ball..downwards button +1F546..1F548 ; Extended_Pictographic# E0.0 [3] (🕆..🕈) WHITE LATIN CROSS..CELTIC CROSS +1F549..1F54A ; Extended_Pictographic# E0.7 [2] (🕉️..🕊️) om..dove +1F54B..1F54E ; Extended_Pictographic# E1.0 [4] (🕋..🕎) kaaba..menorah +1F54F ; Extended_Pictographic# E0.0 [1] (🕏) BOWL OF HYGIEIA +1F550..1F55B ; Extended_Pictographic# E0.6 [12] (🕐..🕛) one o’clock..twelve o’clock +1F55C..1F567 ; Extended_Pictographic# E0.7 [12] (🕜..🕧) one-thirty..twelve-thirty +1F568..1F56E ; Extended_Pictographic# E0.0 [7] (🕨..🕮) RIGHT SPEAKER..BOOK +1F56F..1F570 ; Extended_Pictographic# E0.7 [2] (🕯️..🕰️) candle..mantelpiece clock +1F571..1F572 ; Extended_Pictographic# E0.0 [2] (🕱..🕲) BLACK SKULL AND CROSSBONES..NO PIRACY +1F573..1F579 ; Extended_Pictographic# E0.7 [7] (🕳️..🕹️) hole..joystick +1F57A ; Extended_Pictographic# E3.0 [1] (🕺) man dancing +1F57B..1F586 ; Extended_Pictographic# E0.0 [12] (🕻..🖆) LEFT HAND TELEPHONE RECEIVER..PEN OVER STAMPED ENVELOPE +1F587 ; Extended_Pictographic# E0.7 [1] (🖇️) linked paperclips +1F588..1F589 ; Extended_Pictographic# E0.0 [2] (🖈..🖉) BLACK PUSHPIN..LOWER LEFT PENCIL +1F58A..1F58D ; Extended_Pictographic# E0.7 [4] (🖊️..🖍️) pen..crayon +1F58E..1F58F ; Extended_Pictographic# E0.0 [2] (🖎..🖏) LEFT WRITING HAND..TURNED OK HAND SIGN +1F590 ; Extended_Pictographic# E0.7 [1] (🖐️) hand with fingers splayed +1F591..1F594 ; Extended_Pictographic# E0.0 [4] (🖑..🖔) REVERSED RAISED HAND WITH FINGERS SPLAYED..REVERSED VICTORY HAND +1F595..1F596 ; Extended_Pictographic# E1.0 [2] (🖕..🖖) middle finger..vulcan salute +1F597..1F5A3 ; Extended_Pictographic# E0.0 [13] (🖗..🖣) WHITE DOWN POINTING LEFT HAND INDEX..BLACK DOWN POINTING BACKHAND INDEX +1F5A4 ; Extended_Pictographic# E3.0 [1] (🖤) black heart +1F5A5 ; Extended_Pictographic# E0.7 [1] (🖥️) desktop computer +1F5A6..1F5A7 ; Extended_Pictographic# E0.0 [2] (🖦..🖧) KEYBOARD AND MOUSE..THREE NETWORKED COMPUTERS +1F5A8 ; Extended_Pictographic# E0.7 [1] (🖨️) printer +1F5A9..1F5B0 ; Extended_Pictographic# E0.0 [8] (🖩..🖰) POCKET CALCULATOR..TWO BUTTON MOUSE +1F5B1..1F5B2 ; Extended_Pictographic# E0.7 [2] (🖱️..🖲️) computer mouse..trackball +1F5B3..1F5BB ; Extended_Pictographic# E0.0 [9] (🖳..🖻) OLD PERSONAL COMPUTER..DOCUMENT WITH PICTURE +1F5BC ; Extended_Pictographic# E0.7 [1] (🖼️) framed picture +1F5BD..1F5C1 ; Extended_Pictographic# E0.0 [5] (🖽..🗁) FRAME WITH TILES..OPEN FOLDER +1F5C2..1F5C4 ; Extended_Pictographic# E0.7 [3] (🗂️..🗄️) card index dividers..file cabinet +1F5C5..1F5D0 ; Extended_Pictographic# E0.0 [12] (🗅..🗐) EMPTY NOTE..PAGES +1F5D1..1F5D3 ; Extended_Pictographic# E0.7 [3] (🗑️..🗓️) wastebasket..spiral calendar +1F5D4..1F5DB ; Extended_Pictographic# E0.0 [8] (🗔..🗛) DESKTOP WINDOW..DECREASE FONT SIZE SYMBOL +1F5DC..1F5DE ; Extended_Pictographic# E0.7 [3] (🗜️..🗞️) clamp..rolled-up newspaper +1F5DF..1F5E0 ; Extended_Pictographic# E0.0 [2] (🗟..🗠) PAGE WITH CIRCLED TEXT..STOCK CHART +1F5E1 ; Extended_Pictographic# E0.7 [1] (🗡️) dagger +1F5E2 ; Extended_Pictographic# E0.0 [1] (🗢) LIPS +1F5E3 ; Extended_Pictographic# E0.7 [1] (🗣️) speaking head +1F5E4..1F5E7 ; Extended_Pictographic# E0.0 [4] (🗤..🗧) THREE RAYS ABOVE..THREE RAYS RIGHT +1F5E8 ; Extended_Pictographic# E2.0 [1] (🗨️) left speech bubble +1F5E9..1F5EE ; Extended_Pictographic# E0.0 [6] (🗩..🗮) RIGHT SPEECH BUBBLE..LEFT ANGER BUBBLE +1F5EF ; Extended_Pictographic# E0.7 [1] (🗯️) right anger bubble +1F5F0..1F5F2 ; Extended_Pictographic# E0.0 [3] (🗰..🗲) MOOD BUBBLE..LIGHTNING MOOD +1F5F3 ; Extended_Pictographic# E0.7 [1] (🗳️) ballot box with ballot +1F5F4..1F5F9 ; Extended_Pictographic# E0.0 [6] (🗴..🗹) BALLOT SCRIPT X..BALLOT BOX WITH BOLD CHECK +1F5FA ; Extended_Pictographic# E0.7 [1] (🗺️) world map +1F5FB..1F5FF ; Extended_Pictographic# E0.6 [5] (🗻..🗿) mount fuji..moai +1F600 ; Extended_Pictographic# E1.0 [1] (😀) grinning face +1F601..1F606 ; Extended_Pictographic# E0.6 [6] (😁..😆) beaming face with smiling eyes..grinning squinting face +1F607..1F608 ; Extended_Pictographic# E1.0 [2] (😇..😈) smiling face with halo..smiling face with horns +1F609..1F60D ; Extended_Pictographic# E0.6 [5] (😉..😍) winking face..smiling face with heart-eyes +1F60E ; Extended_Pictographic# E1.0 [1] (😎) smiling face with sunglasses +1F60F ; Extended_Pictographic# E0.6 [1] (😏) smirking face +1F610 ; Extended_Pictographic# E0.7 [1] (😐) neutral face +1F611 ; Extended_Pictographic# E1.0 [1] (😑) expressionless face +1F612..1F614 ; Extended_Pictographic# E0.6 [3] (😒..😔) unamused face..pensive face +1F615 ; Extended_Pictographic# E1.0 [1] (😕) confused face +1F616 ; Extended_Pictographic# E0.6 [1] (😖) confounded face +1F617 ; Extended_Pictographic# E1.0 [1] (😗) kissing face +1F618 ; Extended_Pictographic# E0.6 [1] (😘) face blowing a kiss +1F619 ; Extended_Pictographic# E1.0 [1] (😙) kissing face with smiling eyes +1F61A ; Extended_Pictographic# E0.6 [1] (😚) kissing face with closed eyes +1F61B ; Extended_Pictographic# E1.0 [1] (😛) face with tongue +1F61C..1F61E ; Extended_Pictographic# E0.6 [3] (😜..😞) winking face with tongue..disappointed face +1F61F ; Extended_Pictographic# E1.0 [1] (😟) worried face +1F620..1F625 ; Extended_Pictographic# E0.6 [6] (😠..😥) angry face..sad but relieved face +1F626..1F627 ; Extended_Pictographic# E1.0 [2] (😦..😧) frowning face with open mouth..anguished face +1F628..1F62B ; Extended_Pictographic# E0.6 [4] (😨..😫) fearful face..tired face +1F62C ; Extended_Pictographic# E1.0 [1] (😬) grimacing face +1F62D ; Extended_Pictographic# E0.6 [1] (😭) loudly crying face +1F62E..1F62F ; Extended_Pictographic# E1.0 [2] (😮..😯) face with open mouth..hushed face +1F630..1F633 ; Extended_Pictographic# E0.6 [4] (😰..😳) anxious face with sweat..flushed face +1F634 ; Extended_Pictographic# E1.0 [1] (😴) sleeping face +1F635 ; Extended_Pictographic# E0.6 [1] (😵) face with crossed-out eyes +1F636 ; Extended_Pictographic# E1.0 [1] (😶) face without mouth +1F637..1F640 ; Extended_Pictographic# E0.6 [10] (😷..🙀) face with medical mask..weary cat +1F641..1F644 ; Extended_Pictographic# E1.0 [4] (🙁..🙄) slightly frowning face..face with rolling eyes +1F645..1F64F ; Extended_Pictographic# E0.6 [11] (🙅..🙏) person gesturing NO..folded hands +1F680 ; Extended_Pictographic# E0.6 [1] (🚀) rocket +1F681..1F682 ; Extended_Pictographic# E1.0 [2] (🚁..🚂) helicopter..locomotive +1F683..1F685 ; Extended_Pictographic# E0.6 [3] (🚃..🚅) railway car..bullet train +1F686 ; Extended_Pictographic# E1.0 [1] (🚆) train +1F687 ; Extended_Pictographic# E0.6 [1] (🚇) metro +1F688 ; Extended_Pictographic# E1.0 [1] (🚈) light rail +1F689 ; Extended_Pictographic# E0.6 [1] (🚉) station +1F68A..1F68B ; Extended_Pictographic# E1.0 [2] (🚊..🚋) tram..tram car +1F68C ; Extended_Pictographic# E0.6 [1] (🚌) bus +1F68D ; Extended_Pictographic# E0.7 [1] (🚍) oncoming bus +1F68E ; Extended_Pictographic# E1.0 [1] (🚎) trolleybus +1F68F ; Extended_Pictographic# E0.6 [1] (🚏) bus stop +1F690 ; Extended_Pictographic# E1.0 [1] (🚐) minibus +1F691..1F693 ; Extended_Pictographic# E0.6 [3] (🚑..🚓) ambulance..police car +1F694 ; Extended_Pictographic# E0.7 [1] (🚔) oncoming police car +1F695 ; Extended_Pictographic# E0.6 [1] (🚕) taxi +1F696 ; Extended_Pictographic# E1.0 [1] (🚖) oncoming taxi +1F697 ; Extended_Pictographic# E0.6 [1] (🚗) automobile +1F698 ; Extended_Pictographic# E0.7 [1] (🚘) oncoming automobile +1F699..1F69A ; Extended_Pictographic# E0.6 [2] (🚙..🚚) sport utility vehicle..delivery truck +1F69B..1F6A1 ; Extended_Pictographic# E1.0 [7] (🚛..🚡) articulated lorry..aerial tramway +1F6A2 ; Extended_Pictographic# E0.6 [1] (🚢) ship +1F6A3 ; Extended_Pictographic# E1.0 [1] (🚣) person rowing boat +1F6A4..1F6A5 ; Extended_Pictographic# E0.6 [2] (🚤..🚥) speedboat..horizontal traffic light +1F6A6 ; Extended_Pictographic# E1.0 [1] (🚦) vertical traffic light +1F6A7..1F6AD ; Extended_Pictographic# E0.6 [7] (🚧..🚭) construction..no smoking +1F6AE..1F6B1 ; Extended_Pictographic# E1.0 [4] (🚮..🚱) litter in bin sign..non-potable water +1F6B2 ; Extended_Pictographic# E0.6 [1] (🚲) bicycle +1F6B3..1F6B5 ; Extended_Pictographic# E1.0 [3] (🚳..🚵) no bicycles..person mountain biking +1F6B6 ; Extended_Pictographic# E0.6 [1] (🚶) person walking +1F6B7..1F6B8 ; Extended_Pictographic# E1.0 [2] (🚷..🚸) no pedestrians..children crossing +1F6B9..1F6BE ; Extended_Pictographic# E0.6 [6] (🚹..🚾) men’s room..water closet +1F6BF ; Extended_Pictographic# E1.0 [1] (🚿) shower +1F6C0 ; Extended_Pictographic# E0.6 [1] (🛀) person taking bath +1F6C1..1F6C5 ; Extended_Pictographic# E1.0 [5] (🛁..🛅) bathtub..left luggage +1F6C6..1F6CA ; Extended_Pictographic# E0.0 [5] (🛆..🛊) TRIANGLE WITH ROUNDED CORNERS..GIRLS SYMBOL +1F6CB ; Extended_Pictographic# E0.7 [1] (🛋️) couch and lamp +1F6CC ; Extended_Pictographic# E1.0 [1] (🛌) person in bed +1F6CD..1F6CF ; Extended_Pictographic# E0.7 [3] (🛍️..🛏️) shopping bags..bed +1F6D0 ; Extended_Pictographic# E1.0 [1] (🛐) place of worship +1F6D1..1F6D2 ; Extended_Pictographic# E3.0 [2] (🛑..🛒) stop sign..shopping cart +1F6D3..1F6D4 ; Extended_Pictographic# E0.0 [2] (🛓..🛔) STUPA..PAGODA +1F6D5 ; Extended_Pictographic# E12.0 [1] (🛕) hindu temple +1F6D6..1F6D7 ; Extended_Pictographic# E13.0 [2] (🛖..🛗) hut..elevator +1F6D8..1F6DC ; Extended_Pictographic# E0.0 [5] (🛘..🛜) .. +1F6DD..1F6DF ; Extended_Pictographic# E14.0 [3] (🛝..🛟) playground slide..ring buoy +1F6E0..1F6E5 ; Extended_Pictographic# E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat +1F6E6..1F6E8 ; Extended_Pictographic# E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE +1F6E9 ; Extended_Pictographic# E0.7 [1] (🛩️) small airplane +1F6EA ; Extended_Pictographic# E0.0 [1] (🛪) NORTHEAST-POINTING AIRPLANE +1F6EB..1F6EC ; Extended_Pictographic# E1.0 [2] (🛫..🛬) airplane departure..airplane arrival +1F6ED..1F6EF ; Extended_Pictographic# E0.0 [3] (🛭..🛯) .. +1F6F0 ; Extended_Pictographic# E0.7 [1] (🛰️) satellite +1F6F1..1F6F2 ; Extended_Pictographic# E0.0 [2] (🛱..🛲) ONCOMING FIRE ENGINE..DIESEL LOCOMOTIVE +1F6F3 ; Extended_Pictographic# E0.7 [1] (🛳️) passenger ship +1F6F4..1F6F6 ; Extended_Pictographic# E3.0 [3] (🛴..🛶) kick scooter..canoe +1F6F7..1F6F8 ; Extended_Pictographic# E5.0 [2] (🛷..🛸) sled..flying saucer +1F6F9 ; Extended_Pictographic# E11.0 [1] (🛹) skateboard +1F6FA ; Extended_Pictographic# E12.0 [1] (🛺) auto rickshaw +1F6FB..1F6FC ; Extended_Pictographic# E13.0 [2] (🛻..🛼) pickup truck..roller skate +1F6FD..1F6FF ; Extended_Pictographic# E0.0 [3] (🛽..🛿) .. +1F774..1F77F ; Extended_Pictographic# E0.0 [12] (🝴..🝿) .. +1F7D5..1F7DF ; Extended_Pictographic# E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE.. +1F7E0..1F7EB ; Extended_Pictographic# E12.0 [12] (🟠..🟫) orange circle..brown square +1F7EC..1F7EF ; Extended_Pictographic# E0.0 [4] (🟬..🟯) .. +1F7F0 ; Extended_Pictographic# E14.0 [1] (🟰) heavy equals sign +1F7F1..1F7FF ; Extended_Pictographic# E0.0 [15] (🟱..🟿) .. +1F80C..1F80F ; Extended_Pictographic# E0.0 [4] (🠌..🠏) .. +1F848..1F84F ; Extended_Pictographic# E0.0 [8] (🡈..🡏) .. +1F85A..1F85F ; Extended_Pictographic# E0.0 [6] (🡚..🡟) .. +1F888..1F88F ; Extended_Pictographic# E0.0 [8] (🢈..🢏) .. +1F8AE..1F8FF ; Extended_Pictographic# E0.0 [82] (🢮..🣿) .. +1F90C ; Extended_Pictographic# E13.0 [1] (🤌) pinched fingers +1F90D..1F90F ; Extended_Pictographic# E12.0 [3] (🤍..🤏) white heart..pinching hand +1F910..1F918 ; Extended_Pictographic# E1.0 [9] (🤐..🤘) zipper-mouth face..sign of the horns +1F919..1F91E ; Extended_Pictographic# E3.0 [6] (🤙..🤞) call me hand..crossed fingers +1F91F ; Extended_Pictographic# E5.0 [1] (🤟) love-you gesture +1F920..1F927 ; Extended_Pictographic# E3.0 [8] (🤠..🤧) cowboy hat face..sneezing face +1F928..1F92F ; Extended_Pictographic# E5.0 [8] (🤨..🤯) face with raised eyebrow..exploding head +1F930 ; Extended_Pictographic# E3.0 [1] (🤰) pregnant woman +1F931..1F932 ; Extended_Pictographic# E5.0 [2] (🤱..🤲) breast-feeding..palms up together +1F933..1F93A ; Extended_Pictographic# E3.0 [8] (🤳..🤺) selfie..person fencing +1F93C..1F93E ; Extended_Pictographic# E3.0 [3] (🤼..🤾) people wrestling..person playing handball +1F93F ; Extended_Pictographic# E12.0 [1] (🤿) diving mask +1F940..1F945 ; Extended_Pictographic# E3.0 [6] (🥀..🥅) wilted flower..goal net +1F947..1F94B ; Extended_Pictographic# E3.0 [5] (🥇..🥋) 1st place medal..martial arts uniform +1F94C ; Extended_Pictographic# E5.0 [1] (🥌) curling stone +1F94D..1F94F ; Extended_Pictographic# E11.0 [3] (🥍..🥏) lacrosse..flying disc +1F950..1F95E ; Extended_Pictographic# E3.0 [15] (🥐..🥞) croissant..pancakes +1F95F..1F96B ; Extended_Pictographic# E5.0 [13] (🥟..🥫) dumpling..canned food +1F96C..1F970 ; Extended_Pictographic# E11.0 [5] (🥬..🥰) leafy green..smiling face with hearts +1F971 ; Extended_Pictographic# E12.0 [1] (🥱) yawning face +1F972 ; Extended_Pictographic# E13.0 [1] (🥲) smiling face with tear +1F973..1F976 ; Extended_Pictographic# E11.0 [4] (🥳..🥶) partying face..cold face +1F977..1F978 ; Extended_Pictographic# E13.0 [2] (🥷..🥸) ninja..disguised face +1F979 ; Extended_Pictographic# E14.0 [1] (🥹) face holding back tears +1F97A ; Extended_Pictographic# E11.0 [1] (🥺) pleading face +1F97B ; Extended_Pictographic# E12.0 [1] (🥻) sari +1F97C..1F97F ; Extended_Pictographic# E11.0 [4] (🥼..🥿) lab coat..flat shoe +1F980..1F984 ; Extended_Pictographic# E1.0 [5] (🦀..🦄) crab..unicorn +1F985..1F991 ; Extended_Pictographic# E3.0 [13] (🦅..🦑) eagle..squid +1F992..1F997 ; Extended_Pictographic# E5.0 [6] (🦒..🦗) giraffe..cricket +1F998..1F9A2 ; Extended_Pictographic# E11.0 [11] (🦘..🦢) kangaroo..swan +1F9A3..1F9A4 ; Extended_Pictographic# E13.0 [2] (🦣..🦤) mammoth..dodo +1F9A5..1F9AA ; Extended_Pictographic# E12.0 [6] (🦥..🦪) sloth..oyster +1F9AB..1F9AD ; Extended_Pictographic# E13.0 [3] (🦫..🦭) beaver..seal +1F9AE..1F9AF ; Extended_Pictographic# E12.0 [2] (🦮..🦯) guide dog..white cane +1F9B0..1F9B9 ; Extended_Pictographic# E11.0 [10] (🦰..🦹) red hair..supervillain +1F9BA..1F9BF ; Extended_Pictographic# E12.0 [6] (🦺..🦿) safety vest..mechanical leg +1F9C0 ; Extended_Pictographic# E1.0 [1] (🧀) cheese wedge +1F9C1..1F9C2 ; Extended_Pictographic# E11.0 [2] (🧁..🧂) cupcake..salt +1F9C3..1F9CA ; Extended_Pictographic# E12.0 [8] (🧃..🧊) beverage box..ice +1F9CB ; Extended_Pictographic# E13.0 [1] (🧋) bubble tea +1F9CC ; Extended_Pictographic# E14.0 [1] (🧌) troll +1F9CD..1F9CF ; Extended_Pictographic# E12.0 [3] (🧍..🧏) person standing..deaf person +1F9D0..1F9E6 ; Extended_Pictographic# E5.0 [23] (🧐..🧦) face with monocle..socks +1F9E7..1F9FF ; Extended_Pictographic# E11.0 [25] (🧧..🧿) red envelope..nazar amulet +1FA00..1FA6F ; Extended_Pictographic# E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING.. +1FA70..1FA73 ; Extended_Pictographic# E12.0 [4] (🩰..🩳) ballet shoes..shorts +1FA74 ; Extended_Pictographic# E13.0 [1] (🩴) thong sandal +1FA75..1FA77 ; Extended_Pictographic# E0.0 [3] (🩵..🩷) .. +1FA78..1FA7A ; Extended_Pictographic# E12.0 [3] (🩸..🩺) drop of blood..stethoscope +1FA7B..1FA7C ; Extended_Pictographic# E14.0 [2] (🩻..🩼) x-ray..crutch +1FA7D..1FA7F ; Extended_Pictographic# E0.0 [3] (🩽..🩿) .. +1FA80..1FA82 ; Extended_Pictographic# E12.0 [3] (🪀..🪂) yo-yo..parachute +1FA83..1FA86 ; Extended_Pictographic# E13.0 [4] (🪃..🪆) boomerang..nesting dolls +1FA87..1FA8F ; Extended_Pictographic# E0.0 [9] (🪇..🪏) .. +1FA90..1FA95 ; Extended_Pictographic# E12.0 [6] (🪐..🪕) ringed planet..banjo +1FA96..1FAA8 ; Extended_Pictographic# E13.0 [19] (🪖..🪨) military helmet..rock +1FAA9..1FAAC ; Extended_Pictographic# E14.0 [4] (🪩..🪬) mirror ball..hamsa +1FAAD..1FAAF ; Extended_Pictographic# E0.0 [3] (🪭..🪯) .. +1FAB0..1FAB6 ; Extended_Pictographic# E13.0 [7] (🪰..🪶) fly..feather +1FAB7..1FABA ; Extended_Pictographic# E14.0 [4] (🪷..🪺) lotus..nest with eggs +1FABB..1FABF ; Extended_Pictographic# E0.0 [5] (🪻..🪿) .. +1FAC0..1FAC2 ; Extended_Pictographic# E13.0 [3] (🫀..🫂) anatomical heart..people hugging +1FAC3..1FAC5 ; Extended_Pictographic# E14.0 [3] (🫃..🫅) pregnant man..person with crown +1FAC6..1FACF ; Extended_Pictographic# E0.0 [10] (🫆..🫏) .. +1FAD0..1FAD6 ; Extended_Pictographic# E13.0 [7] (🫐..🫖) blueberries..teapot +1FAD7..1FAD9 ; Extended_Pictographic# E14.0 [3] (🫗..🫙) pouring liquid..jar +1FADA..1FADF ; Extended_Pictographic# E0.0 [6] (🫚..🫟) .. +1FAE0..1FAE7 ; Extended_Pictographic# E14.0 [8] (🫠..🫧) melting face..bubbles +1FAE8..1FAEF ; Extended_Pictographic# E0.0 [8] (🫨..🫯) .. +1FAF0..1FAF6 ; Extended_Pictographic# E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands +1FAF7..1FAFF ; Extended_Pictographic# E0.0 [9] (🫷..🫿) .. +1FC00..1FFFD ; Extended_Pictographic# E0.0[1022] (🰀..🿽) .. + +# Total elements: 3537 + +#EOF diff --git a/libcxx/utils/generate_extended_grapheme_cluster_table.py b/libcxx/utils/generate_extended_grapheme_cluster_table.py --- a/libcxx/utils/generate_extended_grapheme_cluster_table.py +++ b/libcxx/utils/generate_extended_grapheme_cluster_table.py @@ -18,6 +18,7 @@ from dataclasses import dataclass, field from typing import Optional import re +import sys @dataclass @@ -298,8 +299,15 @@ Both files are expected to be in the same directory as this script. """ - gbp_data_path = Path(__file__).absolute().with_name("GraphemeBreakProperty.txt") - emoji_data_path = Path(__file__).absolute().with_name("emoji-data.txt") + gbp_data_path = ( + Path(__file__).absolute().parent + / "data" + / "unicode" + / "GraphemeBreakProperty.txt" + ) + emoji_data_path = ( + Path(__file__).absolute().parent / "data" / "unicode" / "emoji-data.txt" + ) gbp_ranges = list() emoji_ranges = list() with gbp_data_path.open(encoding="utf-8") as f: @@ -317,6 +325,8 @@ if __name__ == "__main__": + if len(sys.argv) == 2: + sys.stdout = open(sys.argv[1], "w") print( MSVC_FORMAT_UCD_TABLES_HPP_TEMPLATE.lstrip().format( content=generate_data_tables() diff --git a/libcxx/utils/generate_extended_grapheme_cluster_test.py b/libcxx/utils/generate_extended_grapheme_cluster_test.py --- a/libcxx/utils/generate_extended_grapheme_cluster_test.py +++ b/libcxx/utils/generate_extended_grapheme_cluster_test.py @@ -17,6 +17,7 @@ from dataclasses import dataclass, field from typing import Optional, TextIO from array import array +import sys @dataclass @@ -229,7 +230,9 @@ def generate_all() -> str: test_data_path = Path(__file__) test_data_path = test_data_path.absolute() - test_data_path = test_data_path.with_name("GraphemeBreakTest.txt") + test_data_path = ( + test_data_path.parent / "data" / "unicode" / "GraphemeBreakTest.txt" + ) lines = list() with open(test_data_path, mode="rt", encoding="utf-8") as file: while line := parseBreakTestLine(file): @@ -244,4 +247,6 @@ if __name__ == "__main__": + if len(sys.argv) == 2: + sys.stdout = open(sys.argv[1], "w") print(generate_all()) diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -147,8 +147,7 @@ AddFeature('asan') if sanitizer == 'Address' else None, AddFlag('-fsanitize=hwaddress') if sanitizer == 'HWAddress' else None, - # FIXME: Use hwasan feature. - AddFeature('asan') if sanitizer == 'HWAddress' else None, + AddFeature('hwasan') if sanitizer == 'HWAddress' else None, AddFlag('-fsanitize=memory') if sanitizer in ['Memory', 'MemoryWithOrigins'] else None, AddFeature('msan') if sanitizer in ['Memory', 'MemoryWithOrigins'] else None, @@ -160,7 +159,7 @@ AddFlag('-fsanitize=dataflow') if sanitizer == 'DataFlow' else None, AddFlag('-fsanitize=leaks') if sanitizer == 'Leaks' else None, - AddFeature('sanitizer-new-delete') if sanitizer in ['Address', 'Memory', 'MemoryWithOrigins', 'Thread'] else None, + AddFeature('sanitizer-new-delete') if sanitizer in ['Address', 'HWAddress', 'Memory', 'MemoryWithOrigins', 'Thread'] else None, ])), Parameter(name='enable_experimental', choices=[True, False], type=bool, default=True, diff --git a/lld/test/ELF/edata-etext.s b/lld/test/ELF/edata-etext.s --- a/lld/test/ELF/edata-etext.s +++ b/lld/test/ELF/edata-etext.s @@ -37,7 +37,7 @@ ## If a relocatable object file defines non-reserved identifiers (by C and C++) ## edata/end/etext, don't redefine them. Note: GNU ld redefines the reserved -## _edata while we don't for simplicty. +## _edata while we don't for simplicity. # RUN: ld.lld %t/b.o -o %t/b # RUN: llvm-objdump -t %t/b | FileCheck %s --check-prefix=CHECK2 # RUN: ld.lld %t/c.o -o %t/c diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -844,8 +844,7 @@ {std::make_pair(&info.Languages, &languages), std::make_pair(&info.Tools, &tools), std::make_pair(&info.SDKs, &sDKs)}) for (auto &producer : *producers.first) - if (producers.second->end() == - llvm::find_if(*producers.second, + if (llvm::none_of(*producers.second, [&](std::pair seen) { return seen.first == producer.first; })) diff --git a/lldb/examples/customization/bin-utils/binutils.py b/lldb/examples/customization/bin-utils/binutils.py --- a/lldb/examples/customization/bin-utils/binutils.py +++ b/lldb/examples/customization/bin-utils/binutils.py @@ -1,7 +1,5 @@ "Collection of tools for displaying bit representation of numbers.""" -from __future__ import print_function - def binary(n, width=None): """ Return a list of (0|1)'s for the binary representation of n where n >= 0. diff --git a/lldb/examples/customization/import-python/importcmd.py b/lldb/examples/customization/import-python/importcmd.py --- a/lldb/examples/customization/import-python/importcmd.py +++ b/lldb/examples/customization/import-python/importcmd.py @@ -1,4 +1,3 @@ -from __future__ import print_function import sys import os import lldb diff --git a/lldb/examples/customization/pwd-cd-and-system/utils.py b/lldb/examples/customization/pwd-cd-and-system/utils.py --- a/lldb/examples/customization/pwd-cd-and-system/utils.py +++ b/lldb/examples/customization/pwd-cd-and-system/utils.py @@ -1,5 +1,4 @@ """Utility for changing directories and execution of commands in a subshell.""" -from __future__ import print_function import os import shlex diff --git a/lldb/examples/darwin/heap_find/heap.py b/lldb/examples/darwin/heap_find/heap.py --- a/lldb/examples/darwin/heap_find/heap.py +++ b/lldb/examples/darwin/heap_find/heap.py @@ -8,7 +8,6 @@ # (lldb) script import lldb.macosx.heap #---------------------------------------------------------------------- -from __future__ import print_function import lldb import optparse import os diff --git a/lldb/examples/python/bsd.py b/lldb/examples/python/bsd.py --- a/lldb/examples/python/bsd.py +++ b/lldb/examples/python/bsd.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import cmd import optparse diff --git a/lldb/examples/python/cmdtemplate.py b/lldb/examples/python/cmdtemplate.py --- a/lldb/examples/python/cmdtemplate.py +++ b/lldb/examples/python/cmdtemplate.py @@ -9,8 +9,6 @@ # (lldb) command script import /path/to/cmdtemplate.py # --------------------------------------------------------------------- -from __future__ import print_function - import inspect import lldb import optparse diff --git a/lldb/examples/python/delta.py b/lldb/examples/python/delta.py --- a/lldb/examples/python/delta.py +++ b/lldb/examples/python/delta.py @@ -16,8 +16,6 @@ # available. #---------------------------------------------------------------------- -from __future__ import print_function - import optparse import os import shlex diff --git a/lldb/examples/python/diagnose_nsstring.py b/lldb/examples/python/diagnose_nsstring.py --- a/lldb/examples/python/diagnose_nsstring.py +++ b/lldb/examples/python/diagnose_nsstring.py @@ -4,8 +4,6 @@ # decisions it did and providing some useful context information that can # be used for improving the formatter -from __future__ import print_function - import lldb diff --git a/lldb/examples/python/diagnose_unwind.py b/lldb/examples/python/diagnose_unwind.py --- a/lldb/examples/python/diagnose_unwind.py +++ b/lldb/examples/python/diagnose_unwind.py @@ -5,7 +5,6 @@ # information about the stack frames, and tries an alternate unwind # algorithm, that will help to understand why lldb's unwind algorithm # did not succeed. -from __future__ import print_function import optparse import lldb diff --git a/lldb/examples/python/gdbremote.py b/lldb/examples/python/gdbremote.py --- a/lldb/examples/python/gdbremote.py +++ b/lldb/examples/python/gdbremote.py @@ -16,7 +16,6 @@ # available. #---------------------------------------------------------------------- -from __future__ import print_function import binascii import subprocess import json diff --git a/lldb/examples/python/globals.py b/lldb/examples/python/globals.py --- a/lldb/examples/python/globals.py +++ b/lldb/examples/python/globals.py @@ -7,7 +7,6 @@ # For the shells sh, bash: # PYTHONPATH=/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Resources/Python ./globals.py [ ...] #---------------------------------------------------------------------- -from __future__ import print_function import lldb import optparse diff --git a/lldb/examples/python/jump.py b/lldb/examples/python/jump.py --- a/lldb/examples/python/jump.py +++ b/lldb/examples/python/jump.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import lldb import re diff --git a/lldb/examples/python/lldb_module_utils.py b/lldb/examples/python/lldb_module_utils.py --- a/lldb/examples/python/lldb_module_utils.py +++ b/lldb/examples/python/lldb_module_utils.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import lldb import optparse diff --git a/lldb/examples/python/lldbtk.py b/lldb/examples/python/lldbtk.py --- a/lldb/examples/python/lldbtk.py +++ b/lldb/examples/python/lldbtk.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import lldb import shlex diff --git a/lldb/examples/python/mach_o.py b/lldb/examples/python/mach_o.py --- a/lldb/examples/python/mach_o.py +++ b/lldb/examples/python/mach_o.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import cmd import dict_utils diff --git a/lldb/examples/python/memory.py b/lldb/examples/python/memory.py --- a/lldb/examples/python/memory.py +++ b/lldb/examples/python/memory.py @@ -9,8 +9,6 @@ # (lldb) command script import /path/to/cmdtemplate.py #---------------------------------------------------------------------- -from __future__ import print_function - import platform import os import re diff --git a/lldb/examples/python/performance.py b/lldb/examples/python/performance.py --- a/lldb/examples/python/performance.py +++ b/lldb/examples/python/performance.py @@ -8,8 +8,6 @@ # export PYTHONPATH=/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Resources/Python #---------------------------------------------------------------------- -from __future__ import print_function - import optparse import os import platform diff --git a/lldb/examples/python/process_events.py b/lldb/examples/python/process_events.py --- a/lldb/examples/python/process_events.py +++ b/lldb/examples/python/process_events.py @@ -8,8 +8,6 @@ # export PYTHONPATH=/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Resources/Python #---------------------------------------------------------------------- -from __future__ import print_function - import optparse import os import platform diff --git a/lldb/examples/python/pytracer.py b/lldb/examples/python/pytracer.py --- a/lldb/examples/python/pytracer.py +++ b/lldb/examples/python/pytracer.py @@ -1,4 +1,3 @@ -from __future__ import print_function import sys import inspect from collections import OrderedDict diff --git a/lldb/examples/python/scripted_step.py b/lldb/examples/python/scripted_step.py --- a/lldb/examples/python/scripted_step.py +++ b/lldb/examples/python/scripted_step.py @@ -93,8 +93,6 @@ # # (lldb) thread step-scripted -C scripted_step.StepWithPlan -from __future__ import print_function - import lldb diff --git a/lldb/examples/python/shadow.py b/lldb/examples/python/shadow.py --- a/lldb/examples/python/shadow.py +++ b/lldb/examples/python/shadow.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import lldb import shlex diff --git a/lldb/examples/python/sources.py b/lldb/examples/python/sources.py --- a/lldb/examples/python/sources.py +++ b/lldb/examples/python/sources.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import lldb import shlex diff --git a/lldb/examples/python/stacks.py b/lldb/examples/python/stacks.py --- a/lldb/examples/python/stacks.py +++ b/lldb/examples/python/stacks.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -from __future__ import print_function import lldb import optparse import shlex diff --git a/lldb/examples/python/symbolication.py b/lldb/examples/python/symbolication.py --- a/lldb/examples/python/symbolication.py +++ b/lldb/examples/python/symbolication.py @@ -26,7 +26,6 @@ # PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash #---------------------------------------------------------------------- -from __future__ import print_function import lldb import optparse import os diff --git a/lldb/examples/python/types.py b/lldb/examples/python/types.py --- a/lldb/examples/python/types.py +++ b/lldb/examples/python/types.py @@ -9,8 +9,6 @@ # (lldb) command script import /path/to/cmdtemplate.py #---------------------------------------------------------------------- -from __future__ import print_function - import platform import os import re diff --git a/lldb/examples/scripting/tree_utils.py b/lldb/examples/scripting/tree_utils.py --- a/lldb/examples/scripting/tree_utils.py +++ b/lldb/examples/scripting/tree_utils.py @@ -18,8 +18,6 @@ http://lldb.llvm.org/scripting.html """ -from __future__ import print_function - def DFS(root, word, cur_path): """ diff --git a/lldb/examples/summaries/cocoa/CFBitVector.py b/lldb/examples/summaries/cocoa/CFBitVector.py --- a/lldb/examples/summaries/cocoa/CFBitVector.py +++ b/lldb/examples/summaries/cocoa/CFBitVector.py @@ -5,7 +5,6 @@ See https://llvm.org/LICENSE.txt for license information. SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """ -from __future__ import print_function # summary provider for CF(Mutable)BitVector import lldb diff --git a/lldb/examples/summaries/cocoa/Logger.py b/lldb/examples/summaries/cocoa/Logger.py --- a/lldb/examples/summaries/cocoa/Logger.py +++ b/lldb/examples/summaries/cocoa/Logger.py @@ -1,4 +1,3 @@ -from __future__ import print_function import sys import os.path import inspect diff --git a/lldb/examples/summaries/cocoa/NSNumber.py b/lldb/examples/summaries/cocoa/NSNumber.py --- a/lldb/examples/summaries/cocoa/NSNumber.py +++ b/lldb/examples/summaries/cocoa/NSNumber.py @@ -8,8 +8,6 @@ # example summary provider for NSNumber # the real summary is now C++ code built into LLDB -from __future__ import print_function - import lldb import ctypes import lldb.runtime.objc.objc_runtime diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -1,4 +1,3 @@ -from __future__ import division import lldb.formatters.Logger # C++ STL formatters for LLDB diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h --- a/lldb/include/lldb/API/SBType.h +++ b/lldb/include/lldb/API/SBType.h @@ -182,6 +182,8 @@ lldb::SBType GetTemplateArgumentType(uint32_t idx); + /// Return the TemplateArgumentKind of the template argument at index idx. + /// Variadic argument packs are automatically expanded. lldb::TemplateArgumentKind GetTemplateArgumentKind(uint32_t idx); lldb::SBType GetFunctionReturnType(); diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h --- a/lldb/include/lldb/Core/Debugger.h +++ b/lldb/include/lldb/Core/Debugger.h @@ -82,6 +82,7 @@ eBroadcastBitProgress = (1 << 0), eBroadcastBitWarning = (1 << 1), eBroadcastBitError = (1 << 2), + eBroadcastSymbolChange = (1 << 3), }; static ConstString GetStaticBroadcasterClass(); @@ -430,6 +431,8 @@ llvm::Optional debugger_id = llvm::None, std::once_flag *once = nullptr); + static void ReportSymbolChange(const ModuleSpec &module_spec); + protected: friend class CommandInterpreter; friend class REPL; diff --git a/lldb/include/lldb/Core/DebuggerEvents.h b/lldb/include/lldb/Core/DebuggerEvents.h --- a/lldb/include/lldb/Core/DebuggerEvents.h +++ b/lldb/include/lldb/Core/DebuggerEvents.h @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "lldb/Core/ModuleSpec.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Event.h" @@ -82,6 +83,28 @@ const DiagnosticEventData &operator=(const DiagnosticEventData &) = delete; }; +class SymbolChangeEventData : public EventData { +public: + SymbolChangeEventData(lldb::DebuggerWP debugger_wp, ModuleSpec module_spec) + : m_debugger_wp(debugger_wp), m_module_spec(std::move(module_spec)) {} + + static ConstString GetFlavorString(); + ConstString GetFlavor() const override; + + static const SymbolChangeEventData * + GetEventDataFromEvent(const Event *event_ptr); + + void DoOnRemoval(Event *event_ptr) override; + +private: + lldb::DebuggerWP m_debugger_wp; + ModuleSpec m_module_spec; + + SymbolChangeEventData(const SymbolChangeEventData &) = delete; + const SymbolChangeEventData & + operator=(const SymbolChangeEventData &) = delete; +}; + } // namespace lldb_private #endif // LLDB_CORE_DEBUGGER_EVENTS_H diff --git a/lldb/include/lldb/Core/ModuleList.h b/lldb/include/lldb/Core/ModuleList.h --- a/lldb/include/lldb/Core/ModuleList.h +++ b/lldb/include/lldb/Core/ModuleList.h @@ -60,6 +60,7 @@ bool SetClangModulesCachePath(const FileSpec &path); bool GetEnableExternalLookup() const; bool SetEnableExternalLookup(bool new_value); + bool GetEnableBackgroundLookup() const; bool GetEnableLLDBIndexCache() const; bool SetEnableLLDBIndexCache(bool new_value); uint64_t GetLLDBIndexCacheMaxByteSize(); @@ -457,6 +458,8 @@ static void FindSharedModules(const ModuleSpec &module_spec, ModuleList &matching_module_list); + static lldb::ModuleSP FindSharedModule(const UUID &uuid); + static size_t RemoveOrphanSharedModules(bool mandatory); static bool RemoveSharedModuleIfOrphaned(const Module *module_ptr); diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h --- a/lldb/include/lldb/Symbol/CompilerType.h +++ b/lldb/include/lldb/Symbol/CompilerType.h @@ -338,14 +338,28 @@ GetIndexOfChildMemberWithName(const char *name, bool omit_empty_base_classes, std::vector &child_indexes) const; - size_t GetNumTemplateArguments() const; - - lldb::TemplateArgumentKind GetTemplateArgumentKind(size_t idx) const; - CompilerType GetTypeTemplateArgument(size_t idx) const; + /// Return the number of template arguments the type has. + /// If expand_pack is true, then variadic argument packs are automatically + /// expanded to their supplied arguments. If it is false an argument pack + /// will only count as 1 argument. + size_t GetNumTemplateArguments(bool expand_pack = false) const; + + // Return the TemplateArgumentKind of the template argument at index idx. + // If expand_pack is true, then variadic argument packs are automatically + // expanded to their supplied arguments. With expand_pack set to false, an + // arguement pack will count as 1 argument and return a type of Pack. + lldb::TemplateArgumentKind + GetTemplateArgumentKind(size_t idx, bool expand_pack = false) const; + CompilerType GetTypeTemplateArgument(size_t idx, + bool expand_pack = false) const; /// Returns the value of the template argument and its type. + /// If expand_pack is true, then variadic argument packs are automatically + /// expanded to their supplied arguments. With expand_pack set to false, an + /// arguement pack will count as 1 argument and it is invalid to call this + /// method on the pack argument. llvm::Optional - GetIntegralTemplateArgument(size_t idx) const; + GetIntegralTemplateArgument(size_t idx, bool expand_pack = false) const; CompilerType GetTypeForFormatters() const; diff --git a/lldb/include/lldb/Symbol/LocateSymbolFile.h b/lldb/include/lldb/Symbol/LocateSymbolFile.h --- a/lldb/include/lldb/Symbol/LocateSymbolFile.h +++ b/lldb/include/lldb/Symbol/LocateSymbolFile.h @@ -14,6 +14,7 @@ #include "lldb/Core/FileSpecList.h" #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/Status.h" +#include "lldb/lldb-forward.h" namespace lldb_private { @@ -52,7 +53,15 @@ // static bool DownloadObjectAndSymbolFile(ModuleSpec &module_spec, Status &error, - bool force_lookup = true); + bool force_lookup = true, + bool copy_executable = true); + + /// Locate the symbol file for the given UUID on a background thread. This + /// function returns immediately. Under the hood it uses the debugger's + /// thread pool to call DownloadObjectAndSymbolFile. If a symbol file is + /// found, this will notify all target which contain the module with the + /// given UUID. + static void DownloadSymbolFileAsync(const UUID &uuid); }; } // namespace lldb_private diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -346,14 +346,18 @@ const char *name, bool omit_empty_base_classes, std::vector &child_indexes) = 0; - virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type); + virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type, + bool expand_pack); virtual lldb::TemplateArgumentKind - GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx); - virtual CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx); + GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack); + virtual CompilerType + GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack); virtual llvm::Optional - GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx); + GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack); // Dumping types diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -162,7 +162,7 @@ bool GetEnableNotifyAboutFixIts() const; FileSpec GetSaveJITObjectsDir() const; - + bool GetEnableSyntheticValue() const; uint32_t GetMaxZeroPaddingInFloatFormat() const; @@ -260,7 +260,7 @@ void DisableASLRValueChangedCallback(); void InheritTCCValueChangedCallback(); void DisableSTDIOValueChangedCallback(); - + // Settings checker for target.jit-save-objects-dir: void CheckJITObjectsDir(); @@ -479,7 +479,8 @@ eBroadcastBitModulesLoaded = (1 << 1), eBroadcastBitModulesUnloaded = (1 << 2), eBroadcastBitWatchpointChanged = (1 << 3), - eBroadcastBitSymbolsLoaded = (1 << 4) + eBroadcastBitSymbolsLoaded = (1 << 4), + eBroadcastBitSymbolsChanged = (1 << 5), }; // These two functions fill out the Broadcaster interface: @@ -981,7 +982,7 @@ ModuleIsExcludedForUnconstrainedSearches(const lldb::ModuleSP &module_sp); const ArchSpec &GetArchitecture() const { return m_arch.GetSpec(); } - + /// Returns the name of the target's ABI plugin. llvm::StringRef GetABIName() const; @@ -1425,30 +1426,30 @@ LazyBool pass = eLazyBoolCalculate; LazyBool notify = eLazyBoolCalculate; LazyBool stop = eLazyBoolCalculate; - DummySignalValues(LazyBool pass, LazyBool notify, LazyBool stop) : - pass(pass), notify(notify), stop(stop) {} + DummySignalValues(LazyBool pass, LazyBool notify, LazyBool stop) + : pass(pass), notify(notify), stop(stop) {} DummySignalValues() = default; }; using DummySignalElement = llvm::StringMapEntry; - static bool UpdateSignalFromDummy(lldb::UnixSignalsSP signals_sp, - const DummySignalElement &element); - static bool ResetSignalFromDummy(lldb::UnixSignalsSP signals_sp, - const DummySignalElement &element); + static bool UpdateSignalFromDummy(lldb::UnixSignalsSP signals_sp, + const DummySignalElement &element); + static bool ResetSignalFromDummy(lldb::UnixSignalsSP signals_sp, + const DummySignalElement &element); public: /// Add a signal to the Target's list of stored signals/actions. These /// values will get copied into any processes launched from /// this target. - void AddDummySignal(llvm::StringRef name, LazyBool pass, LazyBool print, + void AddDummySignal(llvm::StringRef name, LazyBool pass, LazyBool print, LazyBool stop); /// Updates the signals in signals_sp using the stored dummy signals. /// If warning_stream_sp is not null, if any stored signals are not found in /// the current process, a warning will be emitted here. - void UpdateSignalsFromDummy(lldb::UnixSignalsSP signals_sp, + void UpdateSignalsFromDummy(lldb::UnixSignalsSP signals_sp, lldb::StreamSP warning_stream_sp); /// Clear the dummy signals in signal_names from the target, or all signals /// if signal_names is empty. Also remove the behaviors they set from the - /// process's signals if it exists. + /// process's signals if it exists. void ClearDummySignals(Args &signal_names); /// Print all the signals set in this target. void PrintDummySignals(Stream &strm, Args &signals); @@ -1533,7 +1534,7 @@ lldb::TraceSP m_trace_sp; /// Stores the frame recognizers of this target. lldb::StackFrameRecognizerManagerUP m_frame_recognizer_manager_up; - /// These are used to set the signal state when you don't have a process and + /// These are used to set the signal state when you don't have a process and /// more usefully in the Dummy target where you can't know exactly what /// signals you will have. llvm::StringMap m_dummy_signals; diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp --- a/lldb/source/API/SBType.cpp +++ b/lldb/source/API/SBType.cpp @@ -542,7 +542,8 @@ LLDB_INSTRUMENT_VA(this); if (IsValid()) - return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments(); + return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments( + /*expand_pack=*/true); return 0; } @@ -553,13 +554,15 @@ return SBType(); CompilerType type; + const bool expand_pack = true; switch(GetTemplateArgumentKind(idx)) { case eTemplateArgumentKindType: - type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument(idx); + type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument( + idx, expand_pack); break; case eTemplateArgumentKindIntegral: type = m_opaque_sp->GetCompilerType(false) - .GetIntegralTemplateArgument(idx) + .GetIntegralTemplateArgument(idx, expand_pack) ->type; break; default: @@ -574,7 +577,8 @@ LLDB_INSTRUMENT_VA(this, idx); if (IsValid()) - return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(idx); + return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind( + idx, /*expand_pack=*/true); return eTemplateArgumentKindNull; } diff --git a/lldb/source/Core/CoreProperties.td b/lldb/source/Core/CoreProperties.td --- a/lldb/source/Core/CoreProperties.td +++ b/lldb/source/Core/CoreProperties.td @@ -5,6 +5,10 @@ Global, DefaultTrue, Desc<"Control the use of external tools and repositories to locate symbol files. Directories listed in target.debug-file-search-paths and directory of the executable are always checked first for separate debug info files. Then depending on this setting: On macOS, Spotlight would be also used to locate a matching .dSYM bundle based on the UUID of the executable. On NetBSD, directory /usr/libdata/debug would be also searched. On platforms other than NetBSD directory /usr/lib/debug would be also searched.">; + def EnableBackgroundLookup: Property<"enable-background-lookup", "Boolean">, + Global, + DefaultFalse, + Desc<"On macOS, enable calling dsymForUUID (or an equivalent script/binary) in the background to locate symbol files that weren't found.">; def ClangModulesCachePath: Property<"clang-modules-cache-path", "FileSpec">, Global, DefaultStringValue<"">, diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -13,6 +13,7 @@ #include "lldb/Core/FormatEntity.h" #include "lldb/Core/Mangled.h" #include "lldb/Core/ModuleList.h" +#include "lldb/Core/ModuleSpec.h" #include "lldb/Core/PluginManager.h" #include "lldb/Core/StreamAsynchronousIO.h" #include "lldb/Core/StreamFile.h" @@ -104,6 +105,7 @@ nullptr; // NOTE: intentional leak to avoid issues with C++ destructor chain static DebuggerList *g_debugger_list_ptr = nullptr; // NOTE: intentional leak to avoid issues with C++ destructor chain +static llvm::ThreadPool *g_thread_pool = nullptr; static constexpr OptionEnumValueElement g_show_disassembly_enum_values[] = { { @@ -538,6 +540,7 @@ "Debugger::Initialize called more than once!"); g_debugger_list_mutex_ptr = new std::recursive_mutex(); g_debugger_list_ptr = new DebuggerList(); + g_thread_pool = new llvm::ThreadPool(llvm::optimal_concurrency()); g_load_plugin_callback = load_plugin_callback; } @@ -545,6 +548,11 @@ assert(g_debugger_list_ptr && "Debugger::Terminate called without a matching Debugger::Initialize!"); + if (g_thread_pool) { + // The destructor will wait for all the threads to complete. + delete g_thread_pool; + } + if (g_debugger_list_ptr && g_debugger_list_mutex_ptr) { // Clear our global list of debugger objects { @@ -1406,6 +1414,18 @@ debugger_id, once); } +void Debugger::ReportSymbolChange(const ModuleSpec &module_spec) { + if (g_debugger_list_ptr && g_debugger_list_mutex_ptr) { + std::lock_guard guard(*g_debugger_list_mutex_ptr); + for (DebuggerSP debugger_sp : *g_debugger_list_ptr) { + EventSP event_sp = std::make_shared( + Debugger::eBroadcastSymbolChange, + new SymbolChangeEventData(debugger_sp, module_spec)); + debugger_sp->GetBroadcaster().BroadcastEvent(event_sp); + } + } +} + static std::shared_ptr CreateLogHandler(LogHandlerKind log_handler_kind, int fd, bool should_close, size_t buffer_size) { @@ -1702,8 +1722,8 @@ CommandInterpreter::eBroadcastBitAsynchronousErrorData); listener_sp->StartListeningForEvents( - &m_broadcaster, - eBroadcastBitProgress | eBroadcastBitWarning | eBroadcastBitError); + &m_broadcaster, eBroadcastBitProgress | eBroadcastBitWarning | + eBroadcastBitError | eBroadcastSymbolChange); // Let the thread that spawned us know that we have started up and that we // are now listening to all required events so no events get missed @@ -2005,11 +2025,7 @@ } llvm::ThreadPool &Debugger::GetThreadPool() { - // NOTE: intentional leak to avoid issues with C++ destructor chain - static llvm::ThreadPool *g_thread_pool = nullptr; - static llvm::once_flag g_once_flag; - llvm::call_once(g_once_flag, []() { - g_thread_pool = new llvm::ThreadPool(llvm::optimal_concurrency()); - }); + assert(g_thread_pool && + "Debugger::GetThreadPool called before Debugger::Initialize"); return *g_thread_pool; } diff --git a/lldb/source/Core/DebuggerEvents.cpp b/lldb/source/Core/DebuggerEvents.cpp --- a/lldb/source/Core/DebuggerEvents.cpp +++ b/lldb/source/Core/DebuggerEvents.cpp @@ -7,9 +7,12 @@ //===----------------------------------------------------------------------===// #include "lldb/Core/DebuggerEvents.h" +#include "lldb/Core/Debugger.h" +#include "lldb/Core/Module.h" #include "llvm/Support/WithColor.h" using namespace lldb_private; +using namespace lldb; template static const T *GetEventDataFromEventImpl(const Event *event_ptr) { @@ -79,3 +82,37 @@ DiagnosticEventData::GetEventDataFromEvent(const Event *event_ptr) { return GetEventDataFromEventImpl(event_ptr); } + +ConstString SymbolChangeEventData::GetFlavorString() { + static ConstString g_flavor("SymbolChangeEventData"); + return g_flavor; +} + +ConstString SymbolChangeEventData::GetFlavor() const { + return SymbolChangeEventData::GetFlavorString(); +} + +const SymbolChangeEventData * +SymbolChangeEventData::GetEventDataFromEvent(const Event *event_ptr) { + return GetEventDataFromEventImpl(event_ptr); +} + +void SymbolChangeEventData::DoOnRemoval(Event *event_ptr) { + DebuggerSP debugger_sp(m_debugger_wp.lock()); + if (!debugger_sp) + return; + + for (TargetSP target_sp : debugger_sp->GetTargetList().Targets()) { + if (ModuleSP module_sp = + target_sp->GetImages().FindModule(m_module_spec.GetUUID())) { + { + std::lock_guard guard(module_sp->GetMutex()); + if (!module_sp->GetSymbolFileFileSpec()) + module_sp->SetSymbolFileFileSpec(m_module_spec.GetSymbolFileSpec()); + } + ModuleList module_list; + module_list.Append(module_sp); + target_sp->SymbolsDidLoad(module_list); + } + } +} diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp --- a/lldb/source/Core/FormatEntity.cpp +++ b/lldb/source/Core/FormatEntity.cpp @@ -711,9 +711,6 @@ return false; } - if (valobj == nullptr) - return false; - ValueObject::ExpressionPathAftermath what_next = (do_deref_pointer ? ValueObject::eExpressionPathAftermathDereference : ValueObject::eExpressionPathAftermathNothing); @@ -1695,7 +1692,7 @@ llvm::StringRef var_representation; const char *var_name = var_value_sp->GetName().GetCString(); if (var_value_sp->GetCompilerType().IsValid()) { - if (var_value_sp && exe_scope->CalculateTarget()) + if (exe_scope && exe_scope->CalculateTarget()) var_value_sp = var_value_sp->GetQualifiedRepresentationIfAvailable( exe_scope->CalculateTarget() diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -24,6 +24,7 @@ #include "lldb/Interpreter/ScriptInterpreter.h" #include "lldb/Symbol/CompileUnit.h" #include "lldb/Symbol/Function.h" +#include "lldb/Symbol/LocateSymbolFile.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Symbol/Symbol.h" #include "lldb/Symbol/SymbolContext.h" @@ -770,7 +771,7 @@ while (i < sc_list.GetSize()) { if (!sc_list.GetContextAtIndex(i, sc)) break; - + bool keep_it = NameMatchesLookupInfo(sc.GetFunctionName(), sc.GetLanguage()); if (keep_it) @@ -1317,8 +1318,11 @@ } UnwindTable &Module::GetUnwindTable() { - if (!m_unwind_table) + if (!m_unwind_table) { m_unwind_table.emplace(*this); + if (!m_symfile_spec) + Symbols::DownloadSymbolFileAsync(GetUUID()); + } return *m_unwind_table; } diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -106,6 +106,12 @@ nullptr, ePropertyEnableExternalLookup, new_value); } +bool ModuleListProperties::GetEnableBackgroundLookup() const { + const uint32_t idx = ePropertyEnableBackgroundLookup; + return m_collection_sp->GetPropertyAtIndexAsBoolean( + nullptr, idx, g_modulelist_properties[idx].default_uint_value != 0); +} + FileSpec ModuleListProperties::GetClangModulesCachePath() const { return m_collection_sp ->GetPropertyAtIndexAsOptionValueFileSpec(nullptr, false, @@ -768,6 +774,10 @@ GetSharedModuleList().FindModules(module_spec, matching_module_list); } +lldb::ModuleSP ModuleList::FindSharedModule(const UUID &uuid) { + return GetSharedModuleList().FindModule(uuid); +} + size_t ModuleList::RemoveOrphanSharedModules(bool mandatory) { return GetSharedModuleList().RemoveOrphans(mandatory); } diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp --- a/lldb/source/Host/common/Editline.cpp +++ b/lldb/source/Host/common/Editline.cpp @@ -1609,7 +1609,7 @@ switch (cvt.in(state, input.begin(), input.end(), from_next, &out, &out + 1, to_next)) { case std::codecvt_base::ok: - return out != (int)WEOF; + return out != (EditLineGetCharType)WEOF; case std::codecvt_base::error: case std::codecvt_base::noconv: diff --git a/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.h b/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.h --- a/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.h +++ b/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.h @@ -32,8 +32,6 @@ void ClearPseudoMemory(); - bool LoadPseudoRegistersFromFrame(lldb_private::StackFrame &frame); - bool LoadStateFromDictionary(lldb_private::OptionValueDictionary *test_data); bool CompareState(EmulationStateARM &other_state, diff --git a/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.cpp --- a/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.cpp +++ b/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.cpp @@ -26,43 +26,6 @@ EmulationStateARM::~EmulationStateARM() = default; -bool EmulationStateARM::LoadPseudoRegistersFromFrame(StackFrame &frame) { - RegisterContext *reg_ctx = frame.GetRegisterContext().get(); - bool success = true; - uint32_t reg_num; - - for (int i = dwarf_r0; i < dwarf_r0 + 17; ++i) { - reg_num = - reg_ctx->ConvertRegisterKindToRegisterNumber(eRegisterKindDWARF, i); - const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoAtIndex(reg_num); - RegisterValue reg_value; - if (reg_ctx->ReadRegister(reg_info, reg_value)) { - m_gpr[i - dwarf_r0] = reg_value.GetAsUInt32(); - } else - success = false; - } - - for (int i = dwarf_d0; i < dwarf_d0 + 32; ++i) { - reg_num = - reg_ctx->ConvertRegisterKindToRegisterNumber(eRegisterKindDWARF, i); - RegisterValue reg_value; - const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoAtIndex(reg_num); - - if (reg_ctx->ReadRegister(reg_info, reg_value)) { - uint64_t value = reg_value.GetAsUInt64(); - uint32_t idx = i - dwarf_d0; - if (idx < 16) { - m_vfp_regs.s_regs[idx * 2] = (uint32_t)value; - m_vfp_regs.s_regs[idx * 2 + 1] = (uint32_t)(value >> 32); - } else - m_vfp_regs.d_regs[idx - 16] = value; - } else - success = false; - } - - return success; -} - bool EmulationStateARM::StorePseudoRegisterValue(uint32_t reg_num, uint64_t value) { if (reg_num <= dwarf_cpsr) diff --git a/lldb/source/Plugins/Instruction/CMakeLists.txt b/lldb/source/Plugins/Instruction/CMakeLists.txt --- a/lldb/source/Plugins/Instruction/CMakeLists.txt +++ b/lldb/source/Plugins/Instruction/CMakeLists.txt @@ -3,3 +3,4 @@ add_subdirectory(MIPS) add_subdirectory(MIPS64) add_subdirectory(PPC64) +add_subdirectory(RISCV) diff --git a/lldb/source/Plugins/Instruction/RISCV/CMakeLists.txt b/lldb/source/Plugins/Instruction/RISCV/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/lldb/source/Plugins/Instruction/RISCV/CMakeLists.txt @@ -0,0 +1,11 @@ +add_lldb_library(lldbPluginInstructionRISCV PLUGIN + EmulateInstructionRISCV.cpp + + LINK_LIBS + lldbCore + lldbInterpreter + lldbSymbol + lldbPluginProcessUtility + LINK_COMPONENTS + Support + ) diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h new file mode 100644 --- /dev/null +++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.h @@ -0,0 +1,71 @@ +//===-- EmulateInstructionRISCV.h -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_INSTRUCTION_RISCV_EMULATEINSTRUCTIONRISCV_H +#define LLDB_SOURCE_PLUGINS_INSTRUCTION_RISCV_EMULATEINSTRUCTIONRISCV_H + +#include "lldb/Core/EmulateInstruction.h" +#include "lldb/Interpreter/OptionValue.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/Status.h" + +namespace lldb_private { + +class EmulateInstructionRISCV : public EmulateInstruction { +public: + static llvm::StringRef GetPluginNameStatic() { return "riscv"; } + + static llvm::StringRef GetPluginDescriptionStatic() { + return "Emulate instructions for the RISC-V architecture."; + } + + static bool SupportsThisInstructionType(InstructionType inst_type) { + switch (inst_type) { + case eInstructionTypeAny: + case eInstructionTypePCModifying: + return true; + case eInstructionTypePrologueEpilogue: + case eInstructionTypeAll: + return false; + } + } + + static bool SupportsThisArch(const ArchSpec &arch); + + static lldb_private::EmulateInstruction * + CreateInstance(const lldb_private::ArchSpec &arch, InstructionType inst_type); + + static void Initialize(); + + static void Terminate(); + +public: + EmulateInstructionRISCV(const ArchSpec &arch) : EmulateInstruction(arch) {} + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + bool SupportsEmulatingInstructionsOfType(InstructionType inst_type) override { + return SupportsThisInstructionType(inst_type); + } + + bool SetTargetTriple(const ArchSpec &arch) override; + bool ReadInstruction() override; + bool EvaluateInstruction(uint32_t options) override; + bool TestEmulation(Stream *out_stream, ArchSpec &arch, + OptionValueDictionary *test_data) override; + bool GetRegisterInfo(lldb::RegisterKind reg_kind, uint32_t reg_num, + RegisterInfo ®_info) override; + + lldb::addr_t ReadPC(bool *success); + bool WritePC(lldb::addr_t pc); + bool DecodeAndExecute(uint32_t inst, bool ignore_cond); +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_INSTRUCTION_RISCV_EMULATEINSTRUCTIONRISCV_H diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp new file mode 100644 --- /dev/null +++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp @@ -0,0 +1,355 @@ +//===-- EmulateInstructionRISCV.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "EmulateInstructionRISCV.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_riscv64.h" +#include "Plugins/Process/Utility/lldb-riscv-register-enums.h" + +#include "lldb/Core/Address.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Interpreter/OptionValueArray.h" +#include "lldb/Interpreter/OptionValueDictionary.h" +#include "lldb/Symbol/UnwindPlan.h" +#include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/RegisterValue.h" +#include "lldb/Utility/Stream.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/MathExtras.h" + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE_ADV(EmulateInstructionRISCV, InstructionRISCV) + +namespace lldb_private { + +// Masks for detecting instructions types. According to riscv-spec Chap 26. +constexpr uint32_t I_MASK = 0b111000001111111; +constexpr uint32_t J_MASK = 0b000000001111111; +// no funct3 in the b-mask because the logic executing B is quite similar. +constexpr uint32_t B_MASK = 0b000000001111111; + +// The funct3 is the type of compare in B instructions. +// funct3 means "3-bits function selector", which RISC-V ISA uses as minor +// opcode. It reuses the major opcode encoding space. +constexpr uint32_t BEQ = 0b000; +constexpr uint32_t BNE = 0b001; +constexpr uint32_t BLT = 0b100; +constexpr uint32_t BGE = 0b101; +constexpr uint32_t BLTU = 0b110; +constexpr uint32_t BGEU = 0b111; + +constexpr uint32_t DecodeRD(uint32_t inst) { return (inst & 0xF80) >> 7; } +constexpr uint32_t DecodeRS1(uint32_t inst) { return (inst & 0xF8000) >> 15; } +constexpr uint32_t DecodeRS2(uint32_t inst) { return (inst & 0x1F00000) >> 20; } +constexpr uint32_t DecodeFunct3(uint32_t inst) { return (inst & 0x7000) >> 12; } + +constexpr int32_t SignExt(uint32_t imm) { return int32_t(imm); } + +constexpr uint32_t DecodeJImm(uint32_t inst) { + return (uint64_t(int64_t(int32_t(inst & 0x80000000)) >> 11)) // imm[20] + | (inst & 0xff000) // imm[19:12] + | ((inst >> 9) & 0x800) // imm[11] + | ((inst >> 20) & 0x7fe); // imm[10:1] +} + +constexpr uint32_t DecodeIImm(uint32_t inst) { + return int64_t(int32_t(inst)) >> 20; // imm[11:0] +} + +constexpr uint32_t DecodeBImm(uint32_t inst) { + return (uint64_t(int64_t(int32_t(inst & 0x80000000)) >> 19)) // imm[12] + | ((inst & 0x80) << 4) // imm[11] + | ((inst >> 20) & 0x7e0) // imm[10:5] + | ((inst >> 7) & 0x1e); // imm[4:1] +} + +static uint32_t GPREncodingToLLDB(uint32_t reg_encode) { + if (reg_encode == 0) + return gpr_x0_riscv; + if (reg_encode >= 1 && reg_encode <= 31) + return gpr_x1_riscv + reg_encode - 1; + return LLDB_INVALID_REGNUM; +} + +static bool ReadRegister(EmulateInstructionRISCV *emulator, uint32_t reg_encode, + RegisterValue &value) { + uint32_t lldb_reg = GPREncodingToLLDB(reg_encode); + return emulator->ReadRegister(eRegisterKindLLDB, lldb_reg, value); +} + +static bool WriteRegister(EmulateInstructionRISCV *emulator, + uint32_t reg_encode, const RegisterValue &value) { + uint32_t lldb_reg = GPREncodingToLLDB(reg_encode); + EmulateInstruction::Context ctx; + ctx.type = EmulateInstruction::eContextRegisterStore; + ctx.SetNoArgs(); + return emulator->WriteRegister(ctx, eRegisterKindLLDB, lldb_reg, value); +} + +static bool ExecJAL(EmulateInstructionRISCV *emulator, uint32_t inst, bool) { + bool success = false; + int64_t offset = SignExt(DecodeJImm(inst)); + int64_t pc = emulator->ReadPC(&success); + return success && emulator->WritePC(pc + offset) && + WriteRegister(emulator, DecodeRD(inst), + RegisterValue(uint64_t(pc + 4))); +} + +static bool ExecJALR(EmulateInstructionRISCV *emulator, uint32_t inst, bool) { + int64_t offset = SignExt(DecodeIImm(inst)); + RegisterValue value; + if (!ReadRegister(emulator, DecodeRS1(inst), value)) + return false; + bool success = false; + int64_t pc = emulator->ReadPC(&success); + int64_t rs1 = int64_t(value.GetAsUInt64()); + // JALR clears the bottom bit. According to riscv-spec: + // "The JALR instruction now clears the lowest bit of the calculated target + // address, to simplify hardware and to allow auxiliary information to be + // stored in function pointers." + return emulator->WritePC((rs1 + offset) & ~1) && + WriteRegister(emulator, DecodeRD(inst), + RegisterValue(uint64_t(pc + 4))); +} + +static bool CompareB(uint64_t rs1, uint64_t rs2, uint32_t funct3) { + switch (funct3) { + case BEQ: + return rs1 == rs2; + case BNE: + return rs1 != rs2; + case BLT: + return int64_t(rs1) < int64_t(rs2); + case BGE: + return int64_t(rs1) >= int64_t(rs2); + case BLTU: + return rs1 < rs2; + case BGEU: + return rs1 >= rs2; + default: + llvm_unreachable("unexpected funct3"); + } +} + +static bool ExecB(EmulateInstructionRISCV *emulator, uint32_t inst, + bool ignore_cond) { + bool success = false; + uint64_t pc = emulator->ReadPC(&success); + if (!success) + return false; + + uint64_t offset = SignExt(DecodeBImm(inst)); + uint64_t target = pc + offset; + if (ignore_cond) + return emulator->WritePC(target); + + RegisterValue value1; + RegisterValue value2; + if (!ReadRegister(emulator, DecodeRS1(inst), value1) || + !ReadRegister(emulator, DecodeRS2(inst), value2)) + return false; + + uint32_t funct3 = DecodeFunct3(inst); + if (CompareB(value1.GetAsUInt64(), value2.GetAsUInt64(), funct3)) + return emulator->WritePC(target); + + return true; +} + +struct InstrPattern { + const char *name; + /// Bit mask to check the type of a instruction (B-Type, I-Type, J-Type, etc.) + uint32_t type_mask; + /// Characteristic value after bitwise-and with type_mask. + uint32_t eigen; + bool (*exec)(EmulateInstructionRISCV *emulator, uint32_t inst, + bool ignore_cond); +}; + +static InstrPattern PATTERNS[] = { + {"JAL", J_MASK, 0b1101111, ExecJAL}, + {"JALR", I_MASK, 0b000000001100111, ExecJALR}, + {"B", B_MASK, 0b1100011, ExecB}, + // TODO: {LR/SC}.{W/D} and ECALL +}; + +/// This function only determines the next instruction address for software +/// sigle stepping by emulating branching instructions including: +/// - from Base Instruction Set : JAL, JALR, B, ECALL +/// - from Atomic Instruction Set: LR -> BNE -> SC -> BNE +/// We will get rid of this tedious code when the riscv debug spec is ratified. +bool EmulateInstructionRISCV::DecodeAndExecute(uint32_t inst, + bool ignore_cond) { + Log *log = GetLog(LLDBLog::Process | LLDBLog::Breakpoints); + for (const InstrPattern &pat : PATTERNS) { + if ((inst & pat.type_mask) == pat.eigen) { + LLDB_LOGF(log, "EmulateInstructionRISCV::%s: inst(%x) was decoded to %s", + __FUNCTION__, inst, pat.name); + return pat.exec(this, inst, ignore_cond); + } + } + + LLDB_LOGF(log, + "EmulateInstructionRISCV::%s: inst(0x%x) does not branch: " + "no need to calculate the next pc address which is trivial.", + __FUNCTION__, inst); + return true; +} + +bool EmulateInstructionRISCV::EvaluateInstruction(uint32_t options) { + uint32_t inst_size = m_opcode.GetByteSize(); + uint32_t inst = m_opcode.GetOpcode32(); + bool increase_pc = options & eEmulateInstructionOptionAutoAdvancePC; + bool ignore_cond = options & eEmulateInstructionOptionIgnoreConditions; + bool success = false; + + lldb::addr_t old_pc = 0; + if (increase_pc) { + old_pc = ReadPC(&success); + if (!success) + return false; + } + + if (inst_size == 2) { + // TODO: execute RVC + return false; + } + + success = DecodeAndExecute(inst, ignore_cond); + if (!success) + return false; + + if (increase_pc) { + lldb::addr_t new_pc = ReadPC(&success); + if (!success) + return false; + + if (new_pc == old_pc) { + if (!WritePC(old_pc + inst_size)) + return false; + } + } + return true; +} + +bool EmulateInstructionRISCV::ReadInstruction() { + bool success = false; + m_addr = ReadPC(&success); + if (!success) { + m_addr = LLDB_INVALID_ADDRESS; + return false; + } + + Context ctx; + ctx.type = eContextReadOpcode; + ctx.SetNoArgs(); + uint32_t inst = (uint32_t)ReadMemoryUnsigned(ctx, m_addr, 4, 0, &success); + uint16_t try_rvc = (uint16_t)(inst & 0x0000ffff); + // check whether the compressed encode could be valid + uint16_t mask = try_rvc & 0b11; + if (try_rvc != 0 && mask != 3) { + m_opcode.SetOpcode16(try_rvc, GetByteOrder()); + } else { + m_opcode.SetOpcode32(inst, GetByteOrder()); + } + + return true; +} + +lldb::addr_t EmulateInstructionRISCV::ReadPC(bool *success) { + return ReadRegisterUnsigned(eRegisterKindGeneric, LLDB_REGNUM_GENERIC_PC, + LLDB_INVALID_ADDRESS, success); +} + +bool EmulateInstructionRISCV::WritePC(lldb::addr_t pc) { + EmulateInstruction::Context ctx; + ctx.type = eContextAdvancePC; + ctx.SetNoArgs(); + return WriteRegisterUnsigned(ctx, eRegisterKindGeneric, + LLDB_REGNUM_GENERIC_PC, pc); +} + +bool EmulateInstructionRISCV::GetRegisterInfo(lldb::RegisterKind reg_kind, + uint32_t reg_index, + RegisterInfo ®_info) { + if (reg_kind == eRegisterKindGeneric) { + switch (reg_index) { + case LLDB_REGNUM_GENERIC_PC: + reg_kind = eRegisterKindLLDB; + reg_index = gpr_pc_riscv; + break; + case LLDB_REGNUM_GENERIC_SP: + reg_kind = eRegisterKindLLDB; + reg_index = gpr_sp_riscv; + break; + case LLDB_REGNUM_GENERIC_FP: + reg_kind = eRegisterKindLLDB; + reg_index = gpr_fp_riscv; + break; + case LLDB_REGNUM_GENERIC_RA: + reg_kind = eRegisterKindLLDB; + reg_index = gpr_ra_riscv; + break; + // We may handle LLDB_REGNUM_GENERIC_ARGx when more instructions are + // supported. + default: + llvm_unreachable("unsupported register"); + } + } + + const RegisterInfo *array = + RegisterInfoPOSIX_riscv64::GetRegisterInfoPtr(m_arch); + const uint32_t length = + RegisterInfoPOSIX_riscv64::GetRegisterInfoCount(m_arch); + + if (reg_index >= length || reg_kind != eRegisterKindLLDB) + return false; + + reg_info = array[reg_index]; + return true; +} + +bool EmulateInstructionRISCV::SetTargetTriple(const ArchSpec &arch) { + return SupportsThisArch(arch); +} + +bool EmulateInstructionRISCV::TestEmulation(Stream *out_stream, ArchSpec &arch, + OptionValueDictionary *test_data) { + return false; +} + +void EmulateInstructionRISCV::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance); +} + +void EmulateInstructionRISCV::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +lldb_private::EmulateInstruction * +EmulateInstructionRISCV::CreateInstance(const ArchSpec &arch, + InstructionType inst_type) { + if (EmulateInstructionRISCV::SupportsThisInstructionType(inst_type) && + SupportsThisArch(arch)) { + return new EmulateInstructionRISCV(arch); + } + + return nullptr; +} + +bool EmulateInstructionRISCV::SupportsThisArch(const ArchSpec &arch) { + return arch.GetTriple().isRISCV(); +} + +} // namespace lldb_private diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp --- a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp @@ -206,10 +206,10 @@ t; )"; -static StructuredData::Array * +static StructuredData::ArraySP CreateStackTrace(ValueObjectSP o, const std::string &trace_item_name = ".trace") { - StructuredData::Array *trace = new StructuredData::Array(); + auto trace_sp = std::make_shared(); ValueObjectSP trace_value_object = o->GetValueForExpressionPath(trace_item_name.c_str()); size_t count = trace_value_object->GetNumChildren(); @@ -218,18 +218,18 @@ trace_value_object->GetChildAtIndex(j, true)->GetValueAsUnsigned(0); if (trace_addr == 0) break; - trace->AddItem( - StructuredData::ObjectSP(new StructuredData::Integer(trace_addr))); + trace_sp->AddItem(std::make_shared(trace_addr)); } - return trace; + return trace_sp; } -static StructuredData::Array *ConvertToStructuredArray( +static StructuredData::ArraySP ConvertToStructuredArray( ValueObjectSP return_value_sp, const std::string &items_name, const std::string &count_name, - std::function const + std::function const &callback) { - StructuredData::Array *array = new StructuredData::Array(); + auto array_sp = std::make_shared(); unsigned int count = return_value_sp->GetValueForExpressionPath(count_name.c_str()) ->GetValueAsUnsigned(0); @@ -237,13 +237,13 @@ return_value_sp->GetValueForExpressionPath(items_name.c_str()); for (unsigned int i = 0; i < count; i++) { ValueObjectSP o = objects->GetChildAtIndex(i, true); - StructuredData::Dictionary *dict = new StructuredData::Dictionary(); + auto dict_sp = std::make_shared(); - callback(o, dict); + callback(o, dict_sp); - array->AddItem(StructuredData::ObjectSP(dict)); + array_sp->AddItem(dict_sp); } - return array; + return array_sp; } static std::string RetrieveString(ValueObjectSP return_value_sp, @@ -263,8 +263,8 @@ std::map &thread_id_map) { ConvertToStructuredArray( data, ".threads", ".thread_count", - [process_sp, &thread_id_map](ValueObjectSP o, - StructuredData::Dictionary *dict) { + [process_sp, &thread_id_map](const ValueObjectSP &o, + const StructuredData::DictionarySP &dict) { uint64_t thread_id = o->GetValueForExpressionPath(".tid")->GetValueAsUnsigned(0); uint64_t thread_os_id = @@ -338,31 +338,33 @@ std::map thread_id_map; GetRenumberedThreadIds(process_sp, main_value, thread_id_map); - StructuredData::Dictionary *dict = new StructuredData::Dictionary(); + auto dict = std::make_shared(); dict->AddStringItem("instrumentation_class", "ThreadSanitizer"); dict->AddStringItem("issue_type", RetrieveString(main_value, process_sp, ".description")); dict->AddIntegerItem("report_count", main_value->GetValueForExpressionPath(".report_count") ->GetValueAsUnsigned(0)); - dict->AddItem("sleep_trace", StructuredData::ObjectSP(CreateStackTrace( - main_value, ".sleep_trace"))); + dict->AddItem("sleep_trace", CreateStackTrace( + main_value, ".sleep_trace")); - StructuredData::Array *stacks = ConvertToStructuredArray( + StructuredData::ArraySP stacks = ConvertToStructuredArray( main_value, ".stacks", ".stack_count", - [thread_sp](ValueObjectSP o, StructuredData::Dictionary *dict) { + [thread_sp](const ValueObjectSP &o, + const StructuredData::DictionarySP &dict) { dict->AddIntegerItem( "index", o->GetValueForExpressionPath(".idx")->GetValueAsUnsigned(0)); - dict->AddItem("trace", StructuredData::ObjectSP(CreateStackTrace(o))); + dict->AddItem("trace", CreateStackTrace(o)); // "stacks" happen on the current thread dict->AddIntegerItem("thread_id", thread_sp->GetIndexID()); }); - dict->AddItem("stacks", StructuredData::ObjectSP(stacks)); + dict->AddItem("stacks", stacks); - StructuredData::Array *mops = ConvertToStructuredArray( + StructuredData::ArraySP mops = ConvertToStructuredArray( main_value, ".mops", ".mop_count", - [&thread_id_map](ValueObjectSP o, StructuredData::Dictionary *dict) { + [&thread_id_map](const ValueObjectSP &o, + const StructuredData::DictionarySP &dict) { dict->AddIntegerItem( "index", o->GetValueForExpressionPath(".idx")->GetValueAsUnsigned(0)); @@ -383,14 +385,14 @@ dict->AddIntegerItem( "address", o->GetValueForExpressionPath(".addr")->GetValueAsUnsigned(0)); - dict->AddItem("trace", StructuredData::ObjectSP(CreateStackTrace(o))); + dict->AddItem("trace", CreateStackTrace(o)); }); - dict->AddItem("mops", StructuredData::ObjectSP(mops)); + dict->AddItem("mops", mops); - StructuredData::Array *locs = ConvertToStructuredArray( + StructuredData::ArraySP locs = ConvertToStructuredArray( main_value, ".locs", ".loc_count", - [process_sp, &thread_id_map](ValueObjectSP o, - StructuredData::Dictionary *dict) { + [process_sp, &thread_id_map](const ValueObjectSP &o, + const StructuredData::DictionarySP &dict) { dict->AddIntegerItem( "index", o->GetValueForExpressionPath(".idx")->GetValueAsUnsigned(0)); @@ -415,15 +417,15 @@ dict->AddIntegerItem("suppressable", o->GetValueForExpressionPath(".suppressable") ->GetValueAsUnsigned(0)); - dict->AddItem("trace", StructuredData::ObjectSP(CreateStackTrace(o))); + dict->AddItem("trace", CreateStackTrace(o)); dict->AddStringItem("object_type", RetrieveString(o, process_sp, ".object_type")); }); - dict->AddItem("locs", StructuredData::ObjectSP(locs)); + dict->AddItem("locs", locs); - StructuredData::Array *mutexes = ConvertToStructuredArray( + StructuredData::ArraySP mutexes = ConvertToStructuredArray( main_value, ".mutexes", ".mutex_count", - [](ValueObjectSP o, StructuredData::Dictionary *dict) { + [](const ValueObjectSP &o, const StructuredData::DictionarySP &dict) { dict->AddIntegerItem( "index", o->GetValueForExpressionPath(".idx")->GetValueAsUnsigned(0)); @@ -436,14 +438,14 @@ dict->AddIntegerItem( "destroyed", o->GetValueForExpressionPath(".destroyed")->GetValueAsUnsigned(0)); - dict->AddItem("trace", StructuredData::ObjectSP(CreateStackTrace(o))); + dict->AddItem("trace", CreateStackTrace(o)); }); - dict->AddItem("mutexes", StructuredData::ObjectSP(mutexes)); + dict->AddItem("mutexes", mutexes); - StructuredData::Array *threads = ConvertToStructuredArray( + StructuredData::ArraySP threads = ConvertToStructuredArray( main_value, ".threads", ".thread_count", - [process_sp, &thread_id_map](ValueObjectSP o, - StructuredData::Dictionary *dict) { + [process_sp, &thread_id_map](const ValueObjectSP &o, + const StructuredData::DictionarySP &dict) { dict->AddIntegerItem( "index", o->GetValueForExpressionPath(".idx")->GetValueAsUnsigned(0)); @@ -464,13 +466,14 @@ Renumber(o->GetValueForExpressionPath(".parent_tid") ->GetValueAsUnsigned(0), thread_id_map)); - dict->AddItem("trace", StructuredData::ObjectSP(CreateStackTrace(o))); + dict->AddItem("trace", CreateStackTrace(o)); }); - dict->AddItem("threads", StructuredData::ObjectSP(threads)); + dict->AddItem("threads", threads); - StructuredData::Array *unique_tids = ConvertToStructuredArray( + StructuredData::ArraySP unique_tids = ConvertToStructuredArray( main_value, ".unique_tids", ".unique_tid_count", - [&thread_id_map](ValueObjectSP o, StructuredData::Dictionary *dict) { + [&thread_id_map](const ValueObjectSP &o, + const StructuredData::DictionarySP &dict) { dict->AddIntegerItem( "index", o->GetValueForExpressionPath(".idx")->GetValueAsUnsigned(0)); @@ -480,9 +483,9 @@ o->GetValueForExpressionPath(".tid")->GetValueAsUnsigned(0), thread_id_map)); }); - dict->AddItem("unique_tids", StructuredData::ObjectSP(unique_tids)); + dict->AddItem("unique_tids", unique_tids); - return StructuredData::ObjectSP(dict); + return dict; } std::string @@ -1030,9 +1033,8 @@ o->GetObjectForDotSeparatedPath("thread_os_id"); tid_t tid = thread_id_obj ? thread_id_obj->GetIntegerValue() : 0; - HistoryThread *history_thread = - new HistoryThread(*process_sp, tid, pcs); - ThreadSP new_thread_sp(history_thread); + ThreadSP new_thread_sp = + std::make_shared(*process_sp, tid, pcs); new_thread_sp->SetName(GenerateThreadName(path, o, info).c_str()); // Save this in the Process' ExtendedThreadList so a strong pointer @@ -1047,8 +1049,8 @@ lldb::ThreadCollectionSP InstrumentationRuntimeTSan::GetBacktracesFromExtendedStopInfo( StructuredData::ObjectSP info) { - ThreadCollectionSP threads; - threads = std::make_shared(); + + ThreadCollectionSP threads = std::make_shared(); if (info->GetObjectForDotSeparatedPath("instrumentation_class") ->GetStringValue() != "ThreadSanitizer") diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp @@ -882,7 +882,8 @@ } bool NativeProcessLinux::SupportHardwareSingleStepping() const { - if (m_arch.GetMachine() == llvm::Triple::arm || m_arch.IsMIPS()) + if (m_arch.IsMIPS() || m_arch.GetMachine() == llvm::Triple::arm || + m_arch.GetTriple().isRISCV()) return false; return true; } @@ -933,8 +934,14 @@ case eStateStepping: { // Run the thread, possibly feeding it the signal. const int signo = action->signal; - ResumeThread(static_cast(*thread), action->state, - signo); + Status error = ResumeThread(static_cast(*thread), + action->state, signo); + if (error.Fail()) + return Status("NativeProcessLinux::%s: failed to resume thread " + "for pid %" PRIu64 ", tid %" PRIu64 ", error = %s", + __FUNCTION__, GetID(), thread->GetID(), + error.AsCString()); + break; } diff --git a/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp b/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp --- a/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp +++ b/lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp @@ -128,8 +128,10 @@ auto pc_it = baton.m_register_values.find(reg_info_pc->kinds[eRegisterKindDWARF]); - auto flags_it = - baton.m_register_values.find(reg_info_flags->kinds[eRegisterKindDWARF]); + auto flags_it = reg_info_flags == nullptr + ? baton.m_register_values.end() + : baton.m_register_values.find( + reg_info_flags->kinds[eRegisterKindDWARF]); lldb::addr_t next_pc; lldb::addr_t next_flags; @@ -165,7 +167,8 @@ // Arm mode size_hint = 4; } - } else if (arch.IsMIPS() || arch.GetTriple().isPPC64()) + } else if (arch.IsMIPS() || arch.GetTriple().isPPC64() || + arch.GetTriple().isRISCV()) size_hint = 4; error = process.SetBreakpoint(next_pc, size_hint, /*hardware=*/false); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -501,6 +501,8 @@ if (isLocalVariableType(cvs.kind())) { clang::DeclContext *scope = GetParentDeclContext(id); + if (!scope) + return nullptr; clang::Decl *scope_decl = clang::Decl::castFromDeclContext(scope); PdbCompilandSymId scope_id = PdbSymUid(m_decl_to_status[scope_decl].uid).asCompilandSym(); @@ -1010,7 +1012,7 @@ PdbTypeSymId real_type_id{udt.Type, false}; clang::QualType qt = GetOrCreateType(real_type_id); - if (qt.isNull()) + if (qt.isNull() || !scope) return nullptr; std::string uname = std::string(DropNameScope(udt.Name)); @@ -1265,7 +1267,7 @@ lldbassert(false && "Invalid function id type!"); } clang::QualType func_qt = GetOrCreateType(func_ti); - if (func_qt.isNull()) + if (func_qt.isNull() || !parent) return nullptr; CompilerType func_ct = ToCompilerType(func_qt); uint32_t param_count = @@ -1280,6 +1282,8 @@ return llvm::dyn_cast(decl); clang::DeclContext *parent = GetParentDeclContext(PdbSymUid(func_id)); + if (!parent) + return nullptr; std::string context_name; if (clang::NamespaceDecl *ns = llvm::dyn_cast(parent)) { context_name = ns->getQualifiedNameAsString(); diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -345,10 +345,13 @@ // This is a function. It must be global. Creating the Function entry // for it automatically creates a block for it. FunctionSP func = GetOrCreateFunction(block_id, *comp_unit); - Block &block = func->GetBlock(false); - if (block.GetNumRanges() == 0) - block.AddRange(Block::Range(0, func->GetAddressRange().GetByteSize())); - return block; + if (func) { + Block &block = func->GetBlock(false); + if (block.GetNumRanges() == 0) + block.AddRange(Block::Range(0, func->GetAddressRange().GetByteSize())); + return block; + } + break; } case S_BLOCK32: { // This is a block. Its parent is either a function or another block. In @@ -1024,11 +1027,13 @@ continue; if (type == PDB_SymType::Function) { sc.function = GetOrCreateFunction(csid, *sc.comp_unit).get(); - Block &block = sc.function->GetBlock(true); - addr_t func_base = - sc.function->GetAddressRange().GetBaseAddress().GetFileAddress(); - addr_t offset = file_addr - func_base; - sc.block = block.FindInnermostBlockByOffset(offset); + if (sc.function) { + Block &block = sc.function->GetBlock(true); + addr_t func_base = + sc.function->GetAddressRange().GetBaseAddress().GetFileAddress(); + addr_t offset = file_addr - func_base; + sc.block = block.FindInnermostBlockByOffset(offset); + } } if (type == PDB_SymType::Block) { @@ -1713,6 +1718,8 @@ CompilandIndexItem *cii = m_index->compilands().GetCompiland(var_id.modi); CompUnitSP comp_unit_sp = GetOrCreateCompileUnit(*cii); TypeSP type_sp = GetOrCreateType(var_info.type); + if (!type_sp) + return nullptr; std::string name = var_info.name.str(); Declaration decl; SymbolFileTypeSP sftype = @@ -1908,6 +1915,8 @@ CompilerDeclContext SymbolFileNativePDB::GetDeclContextContainingUID(lldb::user_id_t uid) { clang::DeclContext *context = m_ast->GetParentDeclContext(PdbSymUid(uid)); + if (!context) + return CompilerDeclContext(); return m_ast->ToCompilerDeclContext(*context); } @@ -1929,6 +1938,8 @@ return nullptr; TypeSP type_sp = CreateAndCacheType(type_id); + if (!type_sp) + return nullptr; return &*type_sp; } diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -295,7 +295,9 @@ break; case ptev_overflow: // The CPU internal buffer had an overflow error and some instructions - // were lost. + // were lost. A OVF packet comes with an FUP packet (harcoded address) + // according to the documentation, so we'll continue seeing instructions + // after this event. m_decoded_thread.AppendError(IntelPTError(-pte_overflow)); break; default: diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -91,7 +91,7 @@ void SetOwningModule(OptionalClangModuleID id); /// \} }; - + /// A TypeSystem implementation based on Clang. /// /// This class uses a single clang::ASTContext as the backend for storing @@ -334,7 +334,7 @@ llvm::SmallVector names; llvm::SmallVector args; - + const char * pack_name = nullptr; std::unique_ptr packed_args; }; @@ -537,7 +537,7 @@ #ifndef NDEBUG bool Verify(lldb::opaque_compiler_type_t type) override; #endif - + bool IsArrayType(lldb::opaque_compiler_type_t type, CompilerType *element_type, uint64_t *size, bool *is_incomplete) override; @@ -810,16 +810,17 @@ const char *name, bool omit_empty_base_classes, std::vector &child_indexes) override; - size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type) override; + size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type, + bool expand_pack) override; lldb::TemplateArgumentKind - GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, - size_t idx) override; + GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack) override; CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx) override; + size_t idx, bool expand_pack) override; llvm::Optional - GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx) override; + GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx, + bool expand_pack) override; CompilerType GetTypeForFormatters(void *type) override; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -7098,7 +7098,8 @@ } size_t -TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) { +TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type, + bool expand_pack) { if (!type) return 0; @@ -7113,8 +7114,17 @@ const clang::ClassTemplateSpecializationDecl *template_decl = llvm::dyn_cast( cxx_record_decl); - if (template_decl) - return template_decl->getTemplateArgs().size(); + if (template_decl) { + const auto &template_arg_list = template_decl->getTemplateArgs(); + size_t num_args = template_arg_list.size(); + assert(num_args && "template specialization without any args"); + if (expand_pack && num_args) { + const auto &pack = template_arg_list[num_args - 1]; + if (pack.getKind() == clang::TemplateArgument::Pack) + num_args += pack.pack_size() - 1; + } + return num_args; + } } } break; @@ -7151,15 +7161,51 @@ } } +const TemplateArgument * +GetNthTemplateArgument(const clang::ClassTemplateSpecializationDecl *decl, + size_t idx, bool expand_pack) { + const auto &args = decl->getTemplateArgs(); + const size_t args_size = args.size(); + + assert(args_size && "template specialization without any args"); + if (!args_size) + return nullptr; + + const size_t last_idx = args_size - 1; + + // We're asked for a template argument that can't be a parameter pack, so + // return it without worrying about 'expand_pack'. + if (idx < last_idx) + return &args[idx]; + + // We're asked for the last template argument but we don't want/need to + // expand it. + if (!expand_pack || args[last_idx].getKind() != clang::TemplateArgument::Pack) + return idx >= args.size() ? nullptr : &args[idx]; + + // Index into the expanded pack. + // Note that 'idx' counts from the beginning of all template arguments + // (including the ones preceding the parameter pack). + const auto &pack = args[last_idx]; + const size_t pack_idx = idx - last_idx; + const size_t pack_size = pack.pack_size(); + assert(pack_idx < pack_size && "parameter pack index out-of-bounds"); + return &pack.pack_elements()[pack_idx]; +} + lldb::TemplateArgumentKind TypeSystemClang::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, - size_t arg_idx) { + size_t arg_idx, bool expand_pack) { const clang::ClassTemplateSpecializationDecl *template_decl = GetAsTemplateSpecialization(type); - if (! template_decl || arg_idx >= template_decl->getTemplateArgs().size()) + if (!template_decl) + return eTemplateArgumentKindNull; + + const auto *arg = GetNthTemplateArgument(template_decl, arg_idx, expand_pack); + if (!arg) return eTemplateArgumentKindNull; - switch (template_decl->getTemplateArgs()[arg_idx].getKind()) { + switch (arg->getKind()) { case clang::TemplateArgument::Null: return eTemplateArgumentKindNull; @@ -7192,35 +7238,32 @@ CompilerType TypeSystemClang::GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx) { + size_t idx, bool expand_pack) { const clang::ClassTemplateSpecializationDecl *template_decl = GetAsTemplateSpecialization(type); - if (!template_decl || idx >= template_decl->getTemplateArgs().size()) + if (!template_decl) return CompilerType(); - const clang::TemplateArgument &template_arg = - template_decl->getTemplateArgs()[idx]; - if (template_arg.getKind() != clang::TemplateArgument::Type) + const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack); + if (!arg || arg->getKind() != clang::TemplateArgument::Type) return CompilerType(); - return GetType(template_arg.getAsType()); + return GetType(arg->getAsType()); } Optional TypeSystemClang::GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, - size_t idx) { + size_t idx, bool expand_pack) { const clang::ClassTemplateSpecializationDecl *template_decl = GetAsTemplateSpecialization(type); - if (! template_decl || idx >= template_decl->getTemplateArgs().size()) + if (!template_decl) return llvm::None; - const clang::TemplateArgument &template_arg = - template_decl->getTemplateArgs()[idx]; - if (template_arg.getKind() != clang::TemplateArgument::Integral) + const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack); + if (!arg || arg->getKind() != clang::TemplateArgument::Integral) return llvm::None; - return { - {template_arg.getAsIntegral(), GetType(template_arg.getIntegralType())}}; + return {{arg->getAsIntegral(), GetType(arg->getIntegralType())}}; } CompilerType TypeSystemClang::GetTypeForFormatters(void *type) { diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp --- a/lldb/source/Symbol/CompilerType.cpp +++ b/lldb/source/Symbol/CompilerType.cpp @@ -659,30 +659,32 @@ return 0; } -size_t CompilerType::GetNumTemplateArguments() const { +size_t CompilerType::GetNumTemplateArguments(bool expand_pack) const { if (IsValid()) { - return m_type_system->GetNumTemplateArguments(m_type); + return m_type_system->GetNumTemplateArguments(m_type, expand_pack); } return 0; } -TemplateArgumentKind CompilerType::GetTemplateArgumentKind(size_t idx) const { +TemplateArgumentKind +CompilerType::GetTemplateArgumentKind(size_t idx, bool expand_pack) const { if (IsValid()) - return m_type_system->GetTemplateArgumentKind(m_type, idx); + return m_type_system->GetTemplateArgumentKind(m_type, idx, expand_pack); return eTemplateArgumentKindNull; } -CompilerType CompilerType::GetTypeTemplateArgument(size_t idx) const { +CompilerType CompilerType::GetTypeTemplateArgument(size_t idx, + bool expand_pack) const { if (IsValid()) { - return m_type_system->GetTypeTemplateArgument(m_type, idx); + return m_type_system->GetTypeTemplateArgument(m_type, idx, expand_pack); } return CompilerType(); } llvm::Optional -CompilerType::GetIntegralTemplateArgument(size_t idx) const { +CompilerType::GetIntegralTemplateArgument(size_t idx, bool expand_pack) const { if (IsValid()) - return m_type_system->GetIntegralTemplateArgument(m_type, idx); + return m_type_system->GetIntegralTemplateArgument(m_type, idx, expand_pack); return llvm::None; } diff --git a/lldb/source/Symbol/LocateSymbolFile.cpp b/lldb/source/Symbol/LocateSymbolFile.cpp --- a/lldb/source/Symbol/LocateSymbolFile.cpp +++ b/lldb/source/Symbol/LocateSymbolFile.cpp @@ -8,6 +8,8 @@ #include "lldb/Symbol/LocateSymbolFile.h" +#include "lldb/Core/Debugger.h" +#include "lldb/Core/Module.h" #include "lldb/Core/ModuleList.h" #include "lldb/Core/ModuleSpec.h" #include "lldb/Core/Progress.h" @@ -23,7 +25,9 @@ #include "lldb/Utility/Timer.h" #include "lldb/Utility/UUID.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/ThreadPool.h" // From MacOSX system header "mach/machine.h" typedef int cpu_type_t; @@ -397,6 +401,35 @@ return LocateExecutableSymbolFileDsym(module_spec); } +void Symbols::DownloadSymbolFileAsync(const UUID &uuid) { + if (!ModuleList::GetGlobalModuleListProperties().GetEnableBackgroundLookup()) + return; + + static llvm::SmallSet g_seen_uuids; + static std::mutex g_mutex; + Debugger::GetThreadPool().async([=]() { + { + std::lock_guard guard(g_mutex); + if (g_seen_uuids.count(uuid)) + return; + g_seen_uuids.insert(uuid); + } + + Status error; + ModuleSpec module_spec; + module_spec.GetUUID() = uuid; + if (!Symbols::DownloadObjectAndSymbolFile(module_spec, error, + /*force_lookup=*/true, + /*copy_executable=*/false)) + return; + + if (error.Fail()) + return; + + Debugger::ReportSymbolChange(module_spec); + }); +} + #if !defined(__APPLE__) FileSpec Symbols::FindSymbolFileInBundle(const FileSpec &symfile_bundle, @@ -407,7 +440,8 @@ } bool Symbols::DownloadObjectAndSymbolFile(ModuleSpec &module_spec, - Status &error, bool force_lookup) { + Status &error, bool force_lookup, + bool copy_executable) { // Fill in the module_spec.GetFileSpec() for the object file and/or the // module_spec.GetSymbolFileSpec() for the debug symbols file. return false; diff --git a/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp b/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp --- a/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp +++ b/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp @@ -554,7 +554,8 @@ } bool Symbols::DownloadObjectAndSymbolFile(ModuleSpec &module_spec, - Status &error, bool force_lookup) { + Status &error, bool force_lookup, + bool copy_executable) { const UUID *uuid_ptr = module_spec.GetUUIDPtr(); const FileSpec *file_spec_ptr = module_spec.GetFileSpecPtr(); @@ -584,15 +585,18 @@ // Create the dsymForUUID command. StreamString command; + const char *copy_executable_arg = copy_executable ? "--copyExecutable " : ""; if (!uuid_str.empty()) { - command.Printf("%s --ignoreNegativeCache --copyExecutable %s", - dsymForUUID_exe_path.c_str(), uuid_str.c_str()); + command.Printf("%s --ignoreNegativeCache %s%s", + dsymForUUID_exe_path.c_str(), copy_executable_arg, + uuid_str.c_str()); LLDB_LOGF(log, "Calling %s with UUID %s to find dSYM: %s", dsymForUUID_exe_path.c_str(), uuid_str.c_str(), command.GetString().data()); } else if (!file_path_str.empty()) { - command.Printf("%s --ignoreNegativeCache --copyExecutable %s", - dsymForUUID_exe_path.c_str(), file_path_str.c_str()); + command.Printf("%s --ignoreNegativeCache %s%s", + dsymForUUID_exe_path.c_str(), copy_executable_arg, + file_path_str.c_str()); LLDB_LOGF(log, "Calling %s with file %s to find dSYM: %s", dsymForUUID_exe_path.c_str(), file_path_str.c_str(), command.GetString().data()); diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -118,23 +118,25 @@ return CompilerType(this, type); } -size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) { +size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type, + bool expand_pack) { return 0; } TemplateArgumentKind -TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx) { +TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx, + bool expand_pack) { return eTemplateArgumentKindNull; } CompilerType TypeSystem::GetTypeTemplateArgument(opaque_compiler_type_t type, - size_t idx) { + size_t idx, bool expand_pack) { return CompilerType(); } llvm::Optional -TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type, - size_t idx) { +TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type, size_t idx, + bool expand_pack) { return llvm::None; } diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -2060,10 +2060,9 @@ // the same platform supports all architectures then that's the obvious next // best thing. if (candidates.size() == archs.size()) { - if (std::all_of(candidates.begin(), candidates.end(), - [&](const PlatformSP &p) -> bool { - return p->GetName() == candidates.front()->GetName(); - })) { + if (llvm::all_of(candidates, [&](const PlatformSP &p) -> bool { + return p->GetName() == candidates.front()->GetName(); + })) { return candidates.front(); } } diff --git a/lldb/source/Utility/Event.cpp b/lldb/source/Utility/Event.cpp --- a/lldb/source/Utility/Event.cpp +++ b/lldb/source/Utility/Event.cpp @@ -124,9 +124,7 @@ } void EventDataBytes::Dump(Stream *s) const { - size_t num_printable_chars = - std::count_if(m_bytes.begin(), m_bytes.end(), llvm::isPrint); - if (num_printable_chars == m_bytes.size()) + if (llvm::all_of(m_bytes, llvm::isPrint)) s->Format("\"{0}\"", m_bytes); else s->Format("{0:$[ ]@[x-2]}", llvm::make_range( diff --git a/lldb/test/API/api/check_public_api_headers/TestPublicAPIHeaders.py b/lldb/test/API/api/check_public_api_headers/TestPublicAPIHeaders.py --- a/lldb/test/API/api/check_public_api_headers/TestPublicAPIHeaders.py +++ b/lldb/test/API/api/check_public_api_headers/TestPublicAPIHeaders.py @@ -3,9 +3,6 @@ There should be nothing unwanted there and a simpe main.cpp which includes SB*.h should compile and link with the LLDB framework.""" -from __future__ import print_function - - from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil diff --git a/lldb/test/API/api/command-return-object/TestSBCommandReturnObject.py b/lldb/test/API/api/command-return-object/TestSBCommandReturnObject.py --- a/lldb/test/API/api/command-return-object/TestSBCommandReturnObject.py +++ b/lldb/test/API/api/command-return-object/TestSBCommandReturnObject.py @@ -1,8 +1,5 @@ """Test the lldb public C++ api for returning SBCommandReturnObject.""" -from __future__ import print_function - - from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil diff --git a/lldb/test/API/api/multiple-debuggers/TestMultipleDebuggers.py b/lldb/test/API/api/multiple-debuggers/TestMultipleDebuggers.py --- a/lldb/test/API/api/multiple-debuggers/TestMultipleDebuggers.py +++ b/lldb/test/API/api/multiple-debuggers/TestMultipleDebuggers.py @@ -1,8 +1,5 @@ """Test the lldb public C++ api when doing multiple debug sessions simultaneously.""" -from __future__ import print_function - - import os import lldb diff --git a/lldb/test/API/api/multiple-targets/TestMultipleTargets.py b/lldb/test/API/api/multiple-targets/TestMultipleTargets.py --- a/lldb/test/API/api/multiple-targets/TestMultipleTargets.py +++ b/lldb/test/API/api/multiple-targets/TestMultipleTargets.py @@ -1,8 +1,5 @@ """Test the lldb public C++ api when creating multiple targets simultaneously.""" -from __future__ import print_function - - import os import lldb diff --git a/lldb/test/API/api/multithreaded/TestMultithreaded.py b/lldb/test/API/api/multithreaded/TestMultithreaded.py --- a/lldb/test/API/api/multithreaded/TestMultithreaded.py +++ b/lldb/test/API/api/multithreaded/TestMultithreaded.py @@ -1,7 +1,5 @@ """Test the lldb public C++ api breakpoint callbacks.""" -from __future__ import print_function - # __package__ = "lldbsuite.test" diff --git a/lldb/test/API/arm/emulation/TestEmulations.py b/lldb/test/API/arm/emulation/TestEmulations.py --- a/lldb/test/API/arm/emulation/TestEmulations.py +++ b/lldb/test/API/arm/emulation/TestEmulations.py @@ -2,9 +2,6 @@ Test some ARM instruction emulation. """ -from __future__ import print_function - - import os import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/benchmarks/continue/TestBenchmarkContinue.py b/lldb/test/API/benchmarks/continue/TestBenchmarkContinue.py --- a/lldb/test/API/benchmarks/continue/TestBenchmarkContinue.py +++ b/lldb/test/API/benchmarks/continue/TestBenchmarkContinue.py @@ -2,9 +2,6 @@ Test lldb data formatter subsystem. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbbench import * diff --git a/lldb/test/API/benchmarks/expression/TestExpressionCmd.py b/lldb/test/API/benchmarks/expression/TestExpressionCmd.py --- a/lldb/test/API/benchmarks/expression/TestExpressionCmd.py +++ b/lldb/test/API/benchmarks/expression/TestExpressionCmd.py @@ -1,8 +1,5 @@ """Test lldb's expression evaluations and collect statistics.""" -from __future__ import print_function - - import sys import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/benchmarks/expression/TestRepeatedExprs.py b/lldb/test/API/benchmarks/expression/TestRepeatedExprs.py --- a/lldb/test/API/benchmarks/expression/TestRepeatedExprs.py +++ b/lldb/test/API/benchmarks/expression/TestRepeatedExprs.py @@ -1,8 +1,5 @@ """Test evaluating expressions repeatedly comparing lldb against gdb.""" -from __future__ import print_function - - import sys import lldb from lldbsuite.test.lldbbench import BenchBase diff --git a/lldb/test/API/benchmarks/frame_variable/TestFrameVariableResponse.py b/lldb/test/API/benchmarks/frame_variable/TestFrameVariableResponse.py --- a/lldb/test/API/benchmarks/frame_variable/TestFrameVariableResponse.py +++ b/lldb/test/API/benchmarks/frame_variable/TestFrameVariableResponse.py @@ -1,8 +1,5 @@ """Test lldb's response time for 'frame variable' command.""" -from __future__ import print_function - - import sys import lldb from lldbsuite.test import configuration diff --git a/lldb/test/API/benchmarks/libcxxlist/TestBenchmarkLibcxxList.py b/lldb/test/API/benchmarks/libcxxlist/TestBenchmarkLibcxxList.py --- a/lldb/test/API/benchmarks/libcxxlist/TestBenchmarkLibcxxList.py +++ b/lldb/test/API/benchmarks/libcxxlist/TestBenchmarkLibcxxList.py @@ -2,9 +2,6 @@ Test lldb data formatter subsystem. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbbench import * diff --git a/lldb/test/API/benchmarks/libcxxmap/TestBenchmarkLibcxxMap.py b/lldb/test/API/benchmarks/libcxxmap/TestBenchmarkLibcxxMap.py --- a/lldb/test/API/benchmarks/libcxxmap/TestBenchmarkLibcxxMap.py +++ b/lldb/test/API/benchmarks/libcxxmap/TestBenchmarkLibcxxMap.py @@ -2,9 +2,6 @@ Test lldb data formatter subsystem. """ -from __future__ import print_function - - import lldb from lldbsuite.test.lldbbench import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/benchmarks/startup/TestStartupDelays.py b/lldb/test/API/benchmarks/startup/TestStartupDelays.py --- a/lldb/test/API/benchmarks/startup/TestStartupDelays.py +++ b/lldb/test/API/benchmarks/startup/TestStartupDelays.py @@ -1,8 +1,5 @@ """Test lldb's startup delays creating a target, setting a breakpoint, and run to breakpoint stop.""" -from __future__ import print_function - - import sys import lldb from lldbsuite.test import configuration diff --git a/lldb/test/API/benchmarks/stepping/TestSteppingSpeed.py b/lldb/test/API/benchmarks/stepping/TestSteppingSpeed.py --- a/lldb/test/API/benchmarks/stepping/TestSteppingSpeed.py +++ b/lldb/test/API/benchmarks/stepping/TestSteppingSpeed.py @@ -1,7 +1,5 @@ """Test lldb's stepping speed.""" -from __future__ import print_function - import sys import lldb from lldbsuite.test import configuration diff --git a/lldb/test/API/benchmarks/turnaround/TestCompileRunToBreakpointTurnaround.py b/lldb/test/API/benchmarks/turnaround/TestCompileRunToBreakpointTurnaround.py --- a/lldb/test/API/benchmarks/turnaround/TestCompileRunToBreakpointTurnaround.py +++ b/lldb/test/API/benchmarks/turnaround/TestCompileRunToBreakpointTurnaround.py @@ -1,8 +1,5 @@ """Benchmark the turnaround time starting a debugger and run to the breakpoint with lldb vs. gdb.""" -from __future__ import print_function - - import sys import lldb from lldbsuite.test.lldbbench import * diff --git a/lldb/test/API/commands/command/container/welcome.py b/lldb/test/API/commands/command/container/welcome.py --- a/lldb/test/API/commands/command/container/welcome.py +++ b/lldb/test/API/commands/command/container/welcome.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb import sys diff --git a/lldb/test/API/commands/command/script/decorated.py b/lldb/test/API/commands/command/script/decorated.py --- a/lldb/test/API/commands/command/script/decorated.py +++ b/lldb/test/API/commands/command/script/decorated.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import lldb diff --git a/lldb/test/API/commands/command/script/import/bar/bar.py b/lldb/test/API/commands/command/script/import/bar/bar.py --- a/lldb/test/API/commands/command/script/import/bar/bar.py +++ b/lldb/test/API/commands/command/script/import/bar/bar.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - def bar_function(debugger, args, result, dict): global UtilityModule print(UtilityModule.barutil_function("bar told me " + args), file=result) diff --git a/lldb/test/API/commands/command/script/import/foo/bar/foobar.py b/lldb/test/API/commands/command/script/import/foo/bar/foobar.py --- a/lldb/test/API/commands/command/script/import/foo/bar/foobar.py +++ b/lldb/test/API/commands/command/script/import/foo/bar/foobar.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - def foo_function(debugger, args, result, dict): print("foobar says " + args, file=result) return None diff --git a/lldb/test/API/commands/command/script/import/foo/foo.py b/lldb/test/API/commands/command/script/import/foo/foo.py --- a/lldb/test/API/commands/command/script/import/foo/foo.py +++ b/lldb/test/API/commands/command/script/import/foo/foo.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - def foo_function(debugger, args, result, dict): print("foo says " + args, file=result) return None diff --git a/lldb/test/API/commands/command/script/import/foo/foo2.py b/lldb/test/API/commands/command/script/import/foo/foo2.py --- a/lldb/test/API/commands/command/script/import/foo/foo2.py +++ b/lldb/test/API/commands/command/script/import/foo/foo2.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - def foo2_function(debugger, args, result, dict): print("foo2 says " + args, file=result) return None diff --git a/lldb/test/API/commands/command/script/import/thepackage/__init__.py b/lldb/test/API/commands/command/script/import/thepackage/__init__.py --- a/lldb/test/API/commands/command/script/import/thepackage/__init__.py +++ b/lldb/test/API/commands/command/script/import/thepackage/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from . import TPunitA from . import TPunitB diff --git a/lldb/test/API/commands/command/script/mysto.py b/lldb/test/API/commands/command/script/mysto.py --- a/lldb/test/API/commands/command/script/mysto.py +++ b/lldb/test/API/commands/command/script/mysto.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import lldb diff --git a/lldb/test/API/commands/command/script/welcome.py b/lldb/test/API/commands/command/script/welcome.py --- a/lldb/test/API/commands/command/script/welcome.py +++ b/lldb/test/API/commands/command/script/welcome.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb import sys diff --git a/lldb/test/API/commands/command/script_alias/tcsacmd.py b/lldb/test/API/commands/command/script_alias/tcsacmd.py --- a/lldb/test/API/commands/command/script_alias/tcsacmd.py +++ b/lldb/test/API/commands/command/script_alias/tcsacmd.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb diff --git a/lldb/test/API/commands/command/source/my.py b/lldb/test/API/commands/command/source/my.py --- a/lldb/test/API/commands/command/source/my.py +++ b/lldb/test/API/commands/command/source/my.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - def date(): import datetime today = datetime.date.today() diff --git a/lldb/test/API/commands/expression/no-deadlock/TestExprDoesntBlock.py b/lldb/test/API/commands/expression/no-deadlock/TestExprDoesntBlock.py --- a/lldb/test/API/commands/expression/no-deadlock/TestExprDoesntBlock.py +++ b/lldb/test/API/commands/expression/no-deadlock/TestExprDoesntBlock.py @@ -2,9 +2,6 @@ Test that expr will time out and allow other threads to run if it blocks. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/commands/process/launch/TestProcessLaunch.py b/lldb/test/API/commands/process/launch/TestProcessLaunch.py --- a/lldb/test/API/commands/process/launch/TestProcessLaunch.py +++ b/lldb/test/API/commands/process/launch/TestProcessLaunch.py @@ -2,8 +2,6 @@ Test lldb process launch flags. """ -from __future__ import print_function - import os import lldb diff --git a/lldb/test/API/commands/register/register/register_command/TestRegisters.py b/lldb/test/API/commands/register/register/register_command/TestRegisters.py --- a/lldb/test/API/commands/register/register/register_command/TestRegisters.py +++ b/lldb/test/API/commands/register/register/register_command/TestRegisters.py @@ -2,9 +2,6 @@ Test the 'register' command. """ -from __future__ import print_function - - import os import sys import lldb diff --git a/lldb/test/API/commands/watchpoints/multiple_threads/TestWatchpointMultipleThreads.py b/lldb/test/API/commands/watchpoints/multiple_threads/TestWatchpointMultipleThreads.py --- a/lldb/test/API/commands/watchpoints/multiple_threads/TestWatchpointMultipleThreads.py +++ b/lldb/test/API/commands/watchpoints/multiple_threads/TestWatchpointMultipleThreads.py @@ -2,9 +2,6 @@ Test that lldb watchpoint works for multiple threads. """ -from __future__ import print_function - - import re import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/commands/watchpoints/watchpoint_events/TestWatchpointEvents.py b/lldb/test/API/commands/watchpoints/watchpoint_events/TestWatchpointEvents.py --- a/lldb/test/API/commands/watchpoints/watchpoint_events/TestWatchpointEvents.py +++ b/lldb/test/API/commands/watchpoints/watchpoint_events/TestWatchpointEvents.py @@ -1,8 +1,5 @@ """Test that adding, deleting and modifying watchpoints sends the appropriate events.""" -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_command/TestBreakpointCommandsFromPython.py b/lldb/test/API/functionalities/breakpoint/breakpoint_command/TestBreakpointCommandsFromPython.py --- a/lldb/test/API/functionalities/breakpoint/breakpoint_command/TestBreakpointCommandsFromPython.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_command/TestBreakpointCommandsFromPython.py @@ -2,9 +2,6 @@ Test that you can set breakpoint commands successfully with the Python API's: """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_command/bktptcmd.py b/lldb/test/API/functionalities/breakpoint/breakpoint_command/bktptcmd.py --- a/lldb/test/API/functionalities/breakpoint/breakpoint_command/bktptcmd.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_command/bktptcmd.py @@ -1,4 +1,3 @@ -from __future__ import print_function import side_effect def useless_function(first, second): diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_in_delayslot/TestAvoidBreakpointInDelaySlot.py b/lldb/test/API/functionalities/breakpoint/breakpoint_in_delayslot/TestAvoidBreakpointInDelaySlot.py --- a/lldb/test/API/functionalities/breakpoint/breakpoint_in_delayslot/TestAvoidBreakpointInDelaySlot.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_in_delayslot/TestAvoidBreakpointInDelaySlot.py @@ -2,8 +2,6 @@ Test specific to MIPS """ -from __future__ import print_function - import re import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py b/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py --- a/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py +++ b/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py @@ -2,9 +2,6 @@ Test lldb breakpoint ids. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/breakpoint/move_nearest/TestMoveNearest.py b/lldb/test/API/functionalities/breakpoint/move_nearest/TestMoveNearest.py --- a/lldb/test/API/functionalities/breakpoint/move_nearest/TestMoveNearest.py +++ b/lldb/test/API/functionalities/breakpoint/move_nearest/TestMoveNearest.py @@ -1,6 +1,3 @@ -from __future__ import print_function - - import lldb from lldbsuite.test.lldbtest import * import lldbsuite.test.lldbutil as lldbutil diff --git a/lldb/test/API/functionalities/conditional_break/TestConditionalBreak.py b/lldb/test/API/functionalities/conditional_break/TestConditionalBreak.py --- a/lldb/test/API/functionalities/conditional_break/TestConditionalBreak.py +++ b/lldb/test/API/functionalities/conditional_break/TestConditionalBreak.py @@ -2,9 +2,6 @@ Test conditionally break on a function and inspect its variables. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSNumber.py b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSNumber.py --- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSNumber.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSNumber.py @@ -3,8 +3,6 @@ Test lldb data formatter subsystem. """ -from __future__ import print_function - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-python-synth/TestDataFormatterPythonSynth.py b/lldb/test/API/functionalities/data-formatter/data-formatter-python-synth/TestDataFormatterPythonSynth.py --- a/lldb/test/API/functionalities/data-formatter/data-formatter-python-synth/TestDataFormatterPythonSynth.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-python-synth/TestDataFormatterPythonSynth.py @@ -2,9 +2,6 @@ Test lldb data formatter subsystem. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/atomic/TestLibCxxAtomic.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/atomic/TestLibCxxAtomic.py --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/atomic/TestLibCxxAtomic.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/atomic/TestLibCxxAtomic.py @@ -2,9 +2,6 @@ Test lldb data formatter subsystem. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-synthval/TestDataFormatterSynthVal.py b/lldb/test/API/functionalities/data-formatter/data-formatter-synthval/TestDataFormatterSynthVal.py --- a/lldb/test/API/functionalities/data-formatter/data-formatter-synthval/TestDataFormatterSynthVal.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-synthval/TestDataFormatterSynthVal.py @@ -2,9 +2,6 @@ Test lldb data formatter subsystem. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/data-formatter/dump_dynamic/TestDumpDynamic.py b/lldb/test/API/functionalities/data-formatter/dump_dynamic/TestDumpDynamic.py --- a/lldb/test/API/functionalities/data-formatter/dump_dynamic/TestDumpDynamic.py +++ b/lldb/test/API/functionalities/data-formatter/dump_dynamic/TestDumpDynamic.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from lldbsuite.test import lldbinline lldbinline.MakeInlineTest( diff --git a/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py b/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py --- a/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py +++ b/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py @@ -2,9 +2,6 @@ Check that vector types format properly """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/exec/TestExec.py b/lldb/test/API/functionalities/exec/TestExec.py --- a/lldb/test/API/functionalities/exec/TestExec.py +++ b/lldb/test/API/functionalities/exec/TestExec.py @@ -1,8 +1,6 @@ """ Test some lldb command abbreviations. """ -from __future__ import print_function - import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestAArch64XMLRegOffsets.py b/lldb/test/API/functionalities/gdb_remote_client/TestAArch64XMLRegOffsets.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestAArch64XMLRegOffsets.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestAArch64XMLRegOffsets.py @@ -1,4 +1,3 @@ -from __future__ import print_function from textwrap import dedent import lldb from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestArmRegisterDefinition.py b/lldb/test/API/functionalities/gdb_remote_client/TestArmRegisterDefinition.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestArmRegisterDefinition.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestArmRegisterDefinition.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestFork.py b/lldb/test/API/functionalities/gdb_remote_client/TestFork.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestFork.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestFork.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb import unittest from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerNoTargetXML.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerNoTargetXML.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerNoTargetXML.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerNoTargetXML.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerTargetXML.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerTargetXML.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerTargetXML.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBServerTargetXML.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestJLink6Armv7RegisterDefinition.py b/lldb/test/API/functionalities/gdb_remote_client/TestJLink6Armv7RegisterDefinition.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestJLink6Armv7RegisterDefinition.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestJLink6Armv7RegisterDefinition.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestMultiprocess.py b/lldb/test/API/functionalities/gdb_remote_client/TestMultiprocess.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestMultiprocess.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestMultiprocess.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb import unittest from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestNestedRegDefinitions.py b/lldb/test/API/functionalities/gdb_remote_client/TestNestedRegDefinitions.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestNestedRegDefinitions.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestNestedRegDefinitions.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestNoGPacketSupported.py b/lldb/test/API/functionalities/gdb_remote_client/TestNoGPacketSupported.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestNoGPacketSupported.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestNoGPacketSupported.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestNoWatchpointSupportInfo.py b/lldb/test/API/functionalities/gdb_remote_client/TestNoWatchpointSupportInfo.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestNoWatchpointSupportInfo.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestNoWatchpointSupportInfo.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestPartialGPacket.py b/lldb/test/API/functionalities/gdb_remote_client/TestPartialGPacket.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestPartialGPacket.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestPartialGPacket.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestRegDefinitionInParts.py b/lldb/test/API/functionalities/gdb_remote_client/TestRegDefinitionInParts.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestRegDefinitionInParts.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestRegDefinitionInParts.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb import time from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestRemoteRegNums.py b/lldb/test/API/functionalities/gdb_remote_client/TestRemoteRegNums.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestRemoteRegNums.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestRemoteRegNums.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestRestartBug.py b/lldb/test/API/functionalities/gdb_remote_client/TestRestartBug.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestRestartBug.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestRestartBug.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestTargetXMLArch.py b/lldb/test/API/functionalities/gdb_remote_client/TestTargetXMLArch.py --- a/lldb/test/API/functionalities/gdb_remote_client/TestTargetXMLArch.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestTargetXMLArch.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/inferior-assert/TestInferiorAssert.py b/lldb/test/API/functionalities/inferior-assert/TestInferiorAssert.py --- a/lldb/test/API/functionalities/inferior-assert/TestInferiorAssert.py +++ b/lldb/test/API/functionalities/inferior-assert/TestInferiorAssert.py @@ -1,8 +1,5 @@ """Test that lldb functions correctly after the inferior has asserted.""" -from __future__ import print_function - - import lldb from lldbsuite.test import lldbutil from lldbsuite.test import lldbplatformutil diff --git a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py --- a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py +++ b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py @@ -2,9 +2,6 @@ Test that breakpoint by symbol name works correctly with dynamic libs. """ -from __future__ import print_function - - import os import re import lldb diff --git a/lldb/test/API/functionalities/multidebugger_commands/TestMultipleDebuggersCommands.py b/lldb/test/API/functionalities/multidebugger_commands/TestMultipleDebuggersCommands.py --- a/lldb/test/API/functionalities/multidebugger_commands/TestMultipleDebuggersCommands.py +++ b/lldb/test/API/functionalities/multidebugger_commands/TestMultipleDebuggersCommands.py @@ -2,9 +2,6 @@ Test that commands do not try and hold on to stale CommandInterpreters in a multiple debuggers scenario """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/plugins/command_plugin/TestPluginCommands.py b/lldb/test/API/functionalities/plugins/command_plugin/TestPluginCommands.py --- a/lldb/test/API/functionalities/plugins/command_plugin/TestPluginCommands.py +++ b/lldb/test/API/functionalities/plugins/command_plugin/TestPluginCommands.py @@ -2,9 +2,6 @@ Test that plugins that load commands work correctly. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/TestOSPluginStepping.py b/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/TestOSPluginStepping.py --- a/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/TestOSPluginStepping.py +++ b/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/TestOSPluginStepping.py @@ -3,9 +3,6 @@ all threads at every stop. """ -from __future__ import print_function - - import os import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py --- a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py @@ -2,8 +2,6 @@ Test basics of linux core file debugging. """ -from __future__ import division, print_function - import shutil import struct import os diff --git a/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py b/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py --- a/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py +++ b/lldb/test/API/functionalities/postmortem/netbsd-core/TestNetBSDCore.py @@ -2,8 +2,6 @@ Test NetBSD core file debugging. """ -from __future__ import division, print_function - import signal import os diff --git a/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py b/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py --- a/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py +++ b/lldb/test/API/functionalities/recursion/TestValueObjectRecursion.py @@ -2,9 +2,6 @@ Test lldb data formatter subsystem. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/TestModuleLoadedNotifys.py b/lldb/test/API/functionalities/target-new-solib-notifications/TestModuleLoadedNotifys.py --- a/lldb/test/API/functionalities/target-new-solib-notifications/TestModuleLoadedNotifys.py +++ b/lldb/test/API/functionalities/target-new-solib-notifications/TestModuleLoadedNotifys.py @@ -3,9 +3,6 @@ they should be delivered in batches instead of one-by-one. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/tty/TestTerminal.py b/lldb/test/API/functionalities/tty/TestTerminal.py --- a/lldb/test/API/functionalities/tty/TestTerminal.py +++ b/lldb/test/API/functionalities/tty/TestTerminal.py @@ -2,9 +2,6 @@ Test lldb command aliases. """ -from __future__ import print_function - - import unittest2 import os import lldb diff --git a/lldb/test/API/functionalities/unwind/noreturn/TestNoreturnUnwind.py b/lldb/test/API/functionalities/unwind/noreturn/TestNoreturnUnwind.py --- a/lldb/test/API/functionalities/unwind/noreturn/TestNoreturnUnwind.py +++ b/lldb/test/API/functionalities/unwind/noreturn/TestNoreturnUnwind.py @@ -2,9 +2,6 @@ Test that we can backtrace correctly with 'noreturn' functions on the stack """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/unwind/sigtramp/TestSigtrampUnwind.py b/lldb/test/API/functionalities/unwind/sigtramp/TestSigtrampUnwind.py --- a/lldb/test/API/functionalities/unwind/sigtramp/TestSigtrampUnwind.py +++ b/lldb/test/API/functionalities/unwind/sigtramp/TestSigtrampUnwind.py @@ -2,9 +2,6 @@ Test that we can backtrace correctly with 'sigtramp' functions on the stack """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py b/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py --- a/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py +++ b/lldb/test/API/functionalities/unwind/zeroth_frame/TestZerothFrame.py @@ -21,9 +21,6 @@ when using API directly, for example in LLDB-MI. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lang/c/step_over_no_deadlock/TestStepOverDoesntBlock.py b/lldb/test/API/lang/c/step_over_no_deadlock/TestStepOverDoesntBlock.py --- a/lldb/test/API/lang/c/step_over_no_deadlock/TestStepOverDoesntBlock.py +++ b/lldb/test/API/lang/c/step_over_no_deadlock/TestStepOverDoesntBlock.py @@ -2,9 +2,6 @@ Test that step over will let other threads run when necessary """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py b/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/lang/cpp/class-template-parameter-pack/TestTemplatePackArgs.py @@ -0,0 +1,38 @@ +""" +Test that the type of arguments to C++ template classes that have variadic +parameters can be enumerated. +""" +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TemplatePackArgsTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def test_template_argument_pack(self): + self.build() + (_, _, thread, _) = lldbutil.run_to_source_breakpoint(self, + 'breakpoint here', lldb.SBFileSpec('main.cpp'), exe_name = 'a.out') + frame = thread.GetSelectedFrame() + + empty_pack = frame.FindVariable('emptyPack') + self.assertTrue(empty_pack.IsValid(), + 'make sure we find the emptyPack variable') + + only_pack = frame.FindVariable('onlyPack') + self.assertTrue(only_pack.IsValid(), + 'make sure we find the onlyPack variable') + self.assertEqual(only_pack.GetType().GetNumberOfTemplateArguments(), 4) + self.assertEqual(only_pack.GetType().GetTemplateArgumentType(0).GetName(), 'int') + self.assertEqual(only_pack.GetType().GetTemplateArgumentType(1).GetName(), 'char') + self.assertEqual(only_pack.GetType().GetTemplateArgumentType(2).GetName(), 'double') + # Access the C template parameter. + nested_template = only_pack.GetType().GetTemplateArgumentType(3) + self.assertEqual(nested_template.GetName(), 'D') + self.assertEqual(nested_template.GetNumberOfTemplateArguments(), 3) + self.assertEqual(nested_template.GetTemplateArgumentType(0).GetName(), 'int') + self.assertEqual(nested_template.GetTemplateArgumentType(1).GetName(), 'int') + self.assertEqual(nested_template.GetTemplateArgumentType(2).GetName(), 'bool') diff --git a/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp b/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp --- a/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp +++ b/lldb/test/API/lang/cpp/class-template-parameter-pack/main.cpp @@ -26,7 +26,13 @@ bool argsAre_Int_bool() { return true; } }; +template struct OnlyPack {}; +template struct EmptyPack {}; + int main(int argc, char const *argv[]) { + EmptyPack emptyPack; + OnlyPack> onlyPack; + C myC; C myLesserC; myC.member = 64; @@ -34,7 +40,7 @@ (void)C().argsAre_16_32(); (void)(myC.member != 64); D myD; - D myLesserD; + D myLesserD; // breakpoint here myD.member = 64; (void)D().argsAre_Int_bool(); (void)D().argsAre_Int_bool(); diff --git a/lldb/test/API/lang/cpp/class_types/TestClassTypesDisassembly.py b/lldb/test/API/lang/cpp/class_types/TestClassTypesDisassembly.py --- a/lldb/test/API/lang/cpp/class_types/TestClassTypesDisassembly.py +++ b/lldb/test/API/lang/cpp/class_types/TestClassTypesDisassembly.py @@ -2,9 +2,6 @@ Test the lldb disassemble command on each call frame when stopped on C's ctor. """ -from __future__ import print_function - - import os import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/lang/cpp/dynamic-value/TestCppValueCast.py b/lldb/test/API/lang/cpp/dynamic-value/TestCppValueCast.py --- a/lldb/test/API/lang/cpp/dynamic-value/TestCppValueCast.py +++ b/lldb/test/API/lang/cpp/dynamic-value/TestCppValueCast.py @@ -2,9 +2,6 @@ Test lldb Python API SBValue::Cast(SBType) for C++ types. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lang/cpp/stl/TestStdCXXDisassembly.py b/lldb/test/API/lang/cpp/stl/TestStdCXXDisassembly.py --- a/lldb/test/API/lang/cpp/stl/TestStdCXXDisassembly.py +++ b/lldb/test/API/lang/cpp/stl/TestStdCXXDisassembly.py @@ -2,9 +2,6 @@ Test the lldb disassemble command on lib stdc++. """ -from __future__ import print_function - - import os import lldb from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lang/objc/direct-dispatch-step/TestObjCDirectDispatchStepping.py b/lldb/test/API/lang/objc/direct-dispatch-step/TestObjCDirectDispatchStepping.py --- a/lldb/test/API/lang/objc/direct-dispatch-step/TestObjCDirectDispatchStepping.py +++ b/lldb/test/API/lang/objc/direct-dispatch-step/TestObjCDirectDispatchStepping.py @@ -1,8 +1,5 @@ """Test stepping through ObjC method dispatch in various forms.""" -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lang/objc/foundation/TestObjCMethods.py b/lldb/test/API/lang/objc/foundation/TestObjCMethods.py --- a/lldb/test/API/lang/objc/foundation/TestObjCMethods.py +++ b/lldb/test/API/lang/objc/foundation/TestObjCMethods.py @@ -3,9 +3,6 @@ Also lookup objective-c data types and evaluate expressions. """ -from __future__ import print_function - - import os import os.path import lldb diff --git a/lldb/test/API/lang/objc/foundation/TestObjectDescriptionAPI.py b/lldb/test/API/lang/objc/foundation/TestObjectDescriptionAPI.py --- a/lldb/test/API/lang/objc/foundation/TestObjectDescriptionAPI.py +++ b/lldb/test/API/lang/objc/foundation/TestObjectDescriptionAPI.py @@ -2,9 +2,6 @@ Test SBValue.GetObjectDescription() with the value from SBTarget.FindGlobalVariables(). """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lang/objc/objc-class-method/TestObjCClassMethod.py b/lldb/test/API/lang/objc/objc-class-method/TestObjCClassMethod.py --- a/lldb/test/API/lang/objc/objc-class-method/TestObjCClassMethod.py +++ b/lldb/test/API/lang/objc/objc-class-method/TestObjCClassMethod.py @@ -1,8 +1,5 @@ """Test calling functions in class methods.""" -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lang/objc/objc-stepping/TestObjCStepping.py b/lldb/test/API/lang/objc/objc-stepping/TestObjCStepping.py --- a/lldb/test/API/lang/objc/objc-stepping/TestObjCStepping.py +++ b/lldb/test/API/lang/objc/objc-stepping/TestObjCStepping.py @@ -1,8 +1,5 @@ """Test stepping through ObjC method dispatch in various forms.""" -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lang/objc/print-obj/TestPrintObj.py b/lldb/test/API/lang/objc/print-obj/TestPrintObj.py --- a/lldb/test/API/lang/objc/print-obj/TestPrintObj.py +++ b/lldb/test/API/lang/objc/print-obj/TestPrintObj.py @@ -2,9 +2,6 @@ Test "print object" where another thread blocks the print object from making progress. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/lldbtest.py b/lldb/test/API/lldbtest.py --- a/lldb/test/API/lldbtest.py +++ b/lldb/test/API/lldbtest.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import import os import re import operator diff --git a/lldb/test/API/macosx/queues/TestQueues.py b/lldb/test/API/macosx/queues/TestQueues.py --- a/lldb/test/API/macosx/queues/TestQueues.py +++ b/lldb/test/API/macosx/queues/TestQueues.py @@ -1,8 +1,5 @@ """Test queues inspection SB APIs.""" -from __future__ import print_function - - import os import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/python_api/default-constructor/TestDefaultConstructorForAPIObjects.py b/lldb/test/API/python_api/default-constructor/TestDefaultConstructorForAPIObjects.py --- a/lldb/test/API/python_api/default-constructor/TestDefaultConstructorForAPIObjects.py +++ b/lldb/test/API/python_api/default-constructor/TestDefaultConstructorForAPIObjects.py @@ -11,9 +11,6 @@ after default construction. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/disassemble-raw-data/TestDisassembleRawData.py b/lldb/test/API/python_api/disassemble-raw-data/TestDisassembleRawData.py --- a/lldb/test/API/python_api/disassemble-raw-data/TestDisassembleRawData.py +++ b/lldb/test/API/python_api/disassemble-raw-data/TestDisassembleRawData.py @@ -2,9 +2,6 @@ Use lldb Python API to disassemble raw machine code bytes """ -from __future__ import print_function - - import re import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/python_api/disassemble-raw-data/TestDisassemble_VST1_64.py b/lldb/test/API/python_api/disassemble-raw-data/TestDisassemble_VST1_64.py --- a/lldb/test/API/python_api/disassemble-raw-data/TestDisassemble_VST1_64.py +++ b/lldb/test/API/python_api/disassemble-raw-data/TestDisassemble_VST1_64.py @@ -2,8 +2,6 @@ Use lldb Python API to disassemble raw machine code bytes """ -from __future__ import print_function - from io import StringIO import sys diff --git a/lldb/test/API/python_api/event/TestEvents.py b/lldb/test/API/python_api/event/TestEvents.py --- a/lldb/test/API/python_api/event/TestEvents.py +++ b/lldb/test/API/python_api/event/TestEvents.py @@ -2,9 +2,6 @@ Test lldb Python event APIs. """ -from __future__ import print_function - - import re import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/python_api/formatters/TestFormattersSBAPI.py b/lldb/test/API/python_api/formatters/TestFormattersSBAPI.py --- a/lldb/test/API/python_api/formatters/TestFormattersSBAPI.py +++ b/lldb/test/API/python_api/formatters/TestFormattersSBAPI.py @@ -1,8 +1,5 @@ """Test Python APIs for working with formatters""" -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/frame/TestFrames.py b/lldb/test/API/python_api/frame/TestFrames.py --- a/lldb/test/API/python_api/frame/TestFrames.py +++ b/lldb/test/API/python_api/frame/TestFrames.py @@ -3,8 +3,6 @@ And other SBFrame API tests. """ -from __future__ import print_function - import io import lldb diff --git a/lldb/test/API/python_api/frame/get-variables/TestGetVariables.py b/lldb/test/API/python_api/frame/get-variables/TestGetVariables.py --- a/lldb/test/API/python_api/frame/get-variables/TestGetVariables.py +++ b/lldb/test/API/python_api/frame/get-variables/TestGetVariables.py @@ -2,9 +2,6 @@ Test that SBFrame::GetVariables() calls work correctly. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/frame/inlines/TestInlinedFrame.py b/lldb/test/API/python_api/frame/inlines/TestInlinedFrame.py --- a/lldb/test/API/python_api/frame/inlines/TestInlinedFrame.py +++ b/lldb/test/API/python_api/frame/inlines/TestInlinedFrame.py @@ -2,9 +2,6 @@ Testlldb Python SBFrame APIs IsInlined() and GetFunctionName(). """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/function_symbol/TestDisasmAPI.py b/lldb/test/API/python_api/function_symbol/TestDisasmAPI.py --- a/lldb/test/API/python_api/function_symbol/TestDisasmAPI.py +++ b/lldb/test/API/python_api/function_symbol/TestDisasmAPI.py @@ -2,9 +2,6 @@ Test retrieval of SBAddress from function/symbol, disassembly, and SBAddress APIs. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/function_symbol/TestSymbolAPI.py b/lldb/test/API/python_api/function_symbol/TestSymbolAPI.py --- a/lldb/test/API/python_api/function_symbol/TestSymbolAPI.py +++ b/lldb/test/API/python_api/function_symbol/TestSymbolAPI.py @@ -2,9 +2,6 @@ Test newly added SBSymbol and SBAddress APIs. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py --- a/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py +++ b/lldb/test/API/python_api/interpreter/TestCommandInterpreterAPI.py @@ -1,8 +1,5 @@ """Test the SBCommandInterpreter APIs.""" -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/lldbutil/frame/TestFrameUtils.py b/lldb/test/API/python_api/lldbutil/frame/TestFrameUtils.py --- a/lldb/test/API/python_api/lldbutil/frame/TestFrameUtils.py +++ b/lldb/test/API/python_api/lldbutil/frame/TestFrameUtils.py @@ -2,9 +2,6 @@ Test utility functions for the frame object. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/lldbutil/iter/TestLLDBIterator.py b/lldb/test/API/python_api/lldbutil/iter/TestLLDBIterator.py --- a/lldb/test/API/python_api/lldbutil/iter/TestLLDBIterator.py +++ b/lldb/test/API/python_api/lldbutil/iter/TestLLDBIterator.py @@ -2,9 +2,6 @@ Test the iteration protocol for some lldb container objects. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/lldbutil/iter/TestRegistersIterator.py b/lldb/test/API/python_api/lldbutil/iter/TestRegistersIterator.py --- a/lldb/test/API/python_api/lldbutil/iter/TestRegistersIterator.py +++ b/lldb/test/API/python_api/lldbutil/iter/TestRegistersIterator.py @@ -2,9 +2,6 @@ Test the iteration protocol for frame registers. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/module_section/TestModuleAndSection.py b/lldb/test/API/python_api/module_section/TestModuleAndSection.py --- a/lldb/test/API/python_api/module_section/TestModuleAndSection.py +++ b/lldb/test/API/python_api/module_section/TestModuleAndSection.py @@ -2,9 +2,6 @@ Test some SBModule and SBSection APIs. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/process/TestProcessAPI.py b/lldb/test/API/python_api/process/TestProcessAPI.py --- a/lldb/test/API/python_api/process/TestProcessAPI.py +++ b/lldb/test/API/python_api/process/TestProcessAPI.py @@ -2,9 +2,6 @@ Test SBProcess APIs, including ReadMemory(), WriteMemory(), and others. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/process/io/TestProcessIO.py b/lldb/test/API/python_api/process/io/TestProcessIO.py --- a/lldb/test/API/python_api/process/io/TestProcessIO.py +++ b/lldb/test/API/python_api/process/io/TestProcessIO.py @@ -1,8 +1,5 @@ """Test Python APIs for process IO.""" -from __future__ import print_function - - import os import lldb from lldbsuite.test.decorators import * diff --git a/lldb/test/API/python_api/symbol-context/TestSymbolContext.py b/lldb/test/API/python_api/symbol-context/TestSymbolContext.py --- a/lldb/test/API/python_api/symbol-context/TestSymbolContext.py +++ b/lldb/test/API/python_api/symbol-context/TestSymbolContext.py @@ -2,8 +2,6 @@ Test SBSymbolContext APIs. """ -from __future__ import print_function - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/target/TestTargetAPI.py b/lldb/test/API/python_api/target/TestTargetAPI.py --- a/lldb/test/API/python_api/target/TestTargetAPI.py +++ b/lldb/test/API/python_api/target/TestTargetAPI.py @@ -2,9 +2,6 @@ Test SBTarget APIs. """ -from __future__ import print_function - - import unittest2 import os import lldb diff --git a/lldb/test/API/python_api/thread/TestThreadAPI.py b/lldb/test/API/python_api/thread/TestThreadAPI.py --- a/lldb/test/API/python_api/thread/TestThreadAPI.py +++ b/lldb/test/API/python_api/thread/TestThreadAPI.py @@ -2,9 +2,6 @@ Test SBThread APIs. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/type/TestTypeList.py b/lldb/test/API/python_api/type/TestTypeList.py --- a/lldb/test/API/python_api/type/TestTypeList.py +++ b/lldb/test/API/python_api/type/TestTypeList.py @@ -2,10 +2,6 @@ Test SBType and SBTypeList API. """ -from __future__ import print_function - - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/value/TestValueAPI.py b/lldb/test/API/python_api/value/TestValueAPI.py --- a/lldb/test/API/python_api/value/TestValueAPI.py +++ b/lldb/test/API/python_api/value/TestValueAPI.py @@ -2,9 +2,6 @@ Test some SBValue APIs. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/value/linked_list/TestValueAPILinkedList.py b/lldb/test/API/python_api/value/linked_list/TestValueAPILinkedList.py --- a/lldb/test/API/python_api/value/linked_list/TestValueAPILinkedList.py +++ b/lldb/test/API/python_api/value/linked_list/TestValueAPILinkedList.py @@ -3,9 +3,6 @@ supports iteration till the end of list is reached. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/watchpoint/TestSetWatchpoint.py b/lldb/test/API/python_api/watchpoint/TestSetWatchpoint.py --- a/lldb/test/API/python_api/watchpoint/TestSetWatchpoint.py +++ b/lldb/test/API/python_api/watchpoint/TestSetWatchpoint.py @@ -2,9 +2,6 @@ Use lldb Python SBValue API to create a watchpoint for read_write of 'globl' var. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/watchpoint/TestWatchpointIgnoreCount.py b/lldb/test/API/python_api/watchpoint/TestWatchpointIgnoreCount.py --- a/lldb/test/API/python_api/watchpoint/TestWatchpointIgnoreCount.py +++ b/lldb/test/API/python_api/watchpoint/TestWatchpointIgnoreCount.py @@ -2,9 +2,6 @@ Use lldb Python SBWatchpoint API to set the ignore count. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/watchpoint/TestWatchpointIter.py b/lldb/test/API/python_api/watchpoint/TestWatchpointIter.py --- a/lldb/test/API/python_api/watchpoint/TestWatchpointIter.py +++ b/lldb/test/API/python_api/watchpoint/TestWatchpointIter.py @@ -2,10 +2,6 @@ Use lldb Python SBTarget API to iterate on the watchpoint(s) for the target. """ -from __future__ import print_function - - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/watchpoint/condition/TestWatchpointConditionAPI.py b/lldb/test/API/python_api/watchpoint/condition/TestWatchpointConditionAPI.py --- a/lldb/test/API/python_api/watchpoint/condition/TestWatchpointConditionAPI.py +++ b/lldb/test/API/python_api/watchpoint/condition/TestWatchpointConditionAPI.py @@ -2,9 +2,6 @@ Test watchpoint condition API. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/watchpoint/watchlocation/TestSetWatchlocation.py b/lldb/test/API/python_api/watchpoint/watchlocation/TestSetWatchlocation.py --- a/lldb/test/API/python_api/watchpoint/watchlocation/TestSetWatchlocation.py +++ b/lldb/test/API/python_api/watchpoint/watchlocation/TestSetWatchlocation.py @@ -2,10 +2,6 @@ Use lldb Python SBValue.WatchPointee() API to create a watchpoint for write of '*g_char_ptr'. """ -from __future__ import print_function - - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py b/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py --- a/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py +++ b/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py @@ -2,9 +2,6 @@ Use lldb Python SBtarget.WatchAddress() API to create a watchpoint for write of '*g_char_ptr'. """ -from __future__ import print_function - - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/qemu/TestQemuAPI.py b/lldb/test/API/qemu/TestQemuAPI.py --- a/lldb/test/API/qemu/TestQemuAPI.py +++ b/lldb/test/API/qemu/TestQemuAPI.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb import os from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/qemu/TestQemuLaunch.py b/lldb/test/API/qemu/TestQemuLaunch.py --- a/lldb/test/API/qemu/TestQemuLaunch.py +++ b/lldb/test/API/qemu/TestQemuLaunch.py @@ -1,4 +1,3 @@ -from __future__ import print_function import lldb import unittest import os diff --git a/lldb/test/API/sample_test/TestSampleInlineTest.py b/lldb/test/API/sample_test/TestSampleInlineTest.py --- a/lldb/test/API/sample_test/TestSampleInlineTest.py +++ b/lldb/test/API/sample_test/TestSampleInlineTest.py @@ -2,8 +2,6 @@ Describe the purpose of the test here. """ -from __future__ import absolute_import - from lldbsuite.test import lldbinline lldbinline.MakeInlineTest( diff --git a/lldb/test/API/source-manager/TestSourceManager.py b/lldb/test/API/source-manager/TestSourceManager.py --- a/lldb/test/API/source-manager/TestSourceManager.py +++ b/lldb/test/API/source-manager/TestSourceManager.py @@ -9,8 +9,6 @@ Test the caching mechanism of the source manager. """ -from __future__ import print_function - import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py b/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py --- a/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py +++ b/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py @@ -2,9 +2,6 @@ Test that 'stty -a' displays the same output before and after running the lldb command. """ -from __future__ import print_function - - import lldb import io import sys diff --git a/lldb/test/API/test_runner/test/inferior.py b/lldb/test/API/test_runner/test/inferior.py --- a/lldb/test/API/test_runner/test/inferior.py +++ b/lldb/test/API/test_runner/test/inferior.py @@ -1,8 +1,6 @@ #!/usr/bin/env python """Inferior program used by process control tests.""" -from __future__ import print_function - import argparse import datetime import signal diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteHostInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteHostInfo.py --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteHostInfo.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteHostInfo.py @@ -1,5 +1,3 @@ -from __future__ import print_function - # lldb test suite imports from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import TestBase diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemotePlatformFile.py b/lldb/test/API/tools/lldb-server/TestGdbRemotePlatformFile.py --- a/lldb/test/API/tools/lldb-server/TestGdbRemotePlatformFile.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemotePlatformFile.py @@ -1,5 +1,3 @@ -from __future__ import print_function - # lldb test suite imports from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import TestBase diff --git a/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py b/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py --- a/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py +++ b/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import gdbremote_testcase import random import select diff --git a/lldb/test/API/tools/lldb-vscode/console/TestVSCode_console.py b/lldb/test/API/tools/lldb-vscode/console/TestVSCode_console.py --- a/lldb/test/API/tools/lldb-vscode/console/TestVSCode_console.py +++ b/lldb/test/API/tools/lldb-vscode/console/TestVSCode_console.py @@ -2,8 +2,6 @@ Test lldb-vscode setBreakpoints request """ -from __future__ import print_function - import vscode from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/tools/lldb-vscode/correct-thread/TestVSCode_correct_thread.py b/lldb/test/API/tools/lldb-vscode/correct-thread/TestVSCode_correct_thread.py --- a/lldb/test/API/tools/lldb-vscode/correct-thread/TestVSCode_correct_thread.py +++ b/lldb/test/API/tools/lldb-vscode/correct-thread/TestVSCode_correct_thread.py @@ -2,8 +2,6 @@ Test lldb-vscode setBreakpoints request """ -from __future__ import print_function - import vscode from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py --- a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py +++ b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py @@ -2,8 +2,6 @@ Test lldb-vscode setBreakpoints request """ -from __future__ import print_function - import vscode from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py b/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py --- a/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py +++ b/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py @@ -2,8 +2,6 @@ Test lldb-vscode variables/stackTrace request for optimized code """ -from __future__ import print_function - import vscode from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py b/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py --- a/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py +++ b/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py @@ -2,8 +2,6 @@ Test lldb-vscode setBreakpoints request """ -from __future__ import print_function - import vscode from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * diff --git a/lldb/test/API/types/AbstractBase.py b/lldb/test/API/types/AbstractBase.py --- a/lldb/test/API/types/AbstractBase.py +++ b/lldb/test/API/types/AbstractBase.py @@ -2,8 +2,6 @@ Abstract base class of basic types provides a generic type tester method. """ -from __future__ import print_function - import os import re import lldb diff --git a/lldb/test/Shell/Commands/CommandScriptImmediateOutput/Inputs/custom_command.py b/lldb/test/Shell/Commands/CommandScriptImmediateOutput/Inputs/custom_command.py --- a/lldb/test/Shell/Commands/CommandScriptImmediateOutput/Inputs/custom_command.py +++ b/lldb/test/Shell/Commands/CommandScriptImmediateOutput/Inputs/custom_command.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import sys diff --git a/lldb/test/Shell/helper/build.py b/lldb/test/Shell/helper/build.py --- a/lldb/test/Shell/helper/build.py +++ b/lldb/test/Shell/helper/build.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -from __future__ import print_function - import argparse import os import shutil diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt --- a/lldb/tools/lldb-server/CMakeLists.txt +++ b/lldb/tools/lldb-server/CMakeLists.txt @@ -53,6 +53,7 @@ lldbPluginInstructionARM lldbPluginInstructionMIPS lldbPluginInstructionMIPS64 + lldbPluginInstructionRISCV ${LLDB_SYSTEM_LIBS} LINK_COMPONENTS diff --git a/lldb/tools/lldb-server/SystemInitializerLLGS.cpp b/lldb/tools/lldb-server/SystemInitializerLLGS.cpp --- a/lldb/tools/lldb-server/SystemInitializerLLGS.cpp +++ b/lldb/tools/lldb-server/SystemInitializerLLGS.cpp @@ -41,6 +41,11 @@ #include "Plugins/Instruction/MIPS/EmulateInstructionMIPS.h" #endif +#if defined(__riscv) +#define LLDB_TARGET_RISCV +#include "Plugins/Instruction/RISCV/EmulateInstructionRISCV.h" +#endif + using namespace lldb_private; llvm::Error SystemInitializerLLGS::Initialize() { @@ -58,6 +63,9 @@ #if defined(LLDB_TARGET_MIPS64) EmulateInstructionMIPS64::Initialize(); #endif +#if defined(LLDB_TARGET_RISCV) + EmulateInstructionRISCV::Initialize(); +#endif return llvm::Error::success(); } @@ -74,6 +82,9 @@ #if defined(LLDB_TARGET_MIPS64) EmulateInstructionMIPS64::Terminate(); #endif +#if defined(LLDB_TARGET_RISCV) + EmulateInstructionRISCV::Terminate(); +#endif SystemInitializerCommon::Terminate(); } diff --git a/lldb/unittests/Instruction/TestAArch64Emulator.cpp b/lldb/unittests/Instruction/ARM64/TestAArch64Emulator.cpp rename from lldb/unittests/Instruction/TestAArch64Emulator.cpp rename to lldb/unittests/Instruction/ARM64/TestAArch64Emulator.cpp diff --git a/lldb/unittests/Instruction/CMakeLists.txt b/lldb/unittests/Instruction/CMakeLists.txt --- a/lldb/unittests/Instruction/CMakeLists.txt +++ b/lldb/unittests/Instruction/CMakeLists.txt @@ -1,12 +1,29 @@ -if("ARM" IN_LIST LLVM_TARGETS_TO_BUILD) +set(FILES "") +set(DEPS "") + +if ("ARM" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND FILES ARM64/TestAArch64Emulator.cpp) + list(APPEND DEPS lldbPluginInstructionARM64) +endif () + +if ("RISCV" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND FILES RISCV/TestRISCVEmulator.cpp) + list(APPEND DEPS lldbPluginInstructionRISCV) +endif () + +list(LENGTH FILES LISTLEN) + +if (LISTLEN GREATER 0) add_lldb_unittest(EmulatorTests - TestAArch64Emulator.cpp + ${FILES} + LINK_LIBS lldbCore lldbSymbol lldbTarget - lldbPluginInstructionARM64 + ${DEPS} LINK_COMPONENTS Support - ${LLVM_TARGETS_TO_BUILD}) -endif() + ${LLVM_TARGETS_TO_BUILD} + ) +endif () diff --git a/lldb/unittests/Instruction/RISCV/TestRISCVEmulator.cpp b/lldb/unittests/Instruction/RISCV/TestRISCVEmulator.cpp new file mode 100644 --- /dev/null +++ b/lldb/unittests/Instruction/RISCV/TestRISCVEmulator.cpp @@ -0,0 +1,196 @@ +//===-- TestRISCVEmulator.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include "lldb/Core/Address.h" +#include "lldb/Core/Disassembler.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Target/ExecutionContext.h" +#include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/RegisterValue.h" + +#include "Plugins/Instruction/RISCV/EmulateInstructionRISCV.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_riscv64.h" +#include "Plugins/Process/Utility/lldb-riscv-register-enums.h" + +using namespace lldb; +using namespace lldb_private; + +struct RISCVEmulatorTester : public EmulateInstructionRISCV, testing::Test { + RegisterInfoPOSIX_riscv64::GPR gpr; + + RISCVEmulatorTester() + : EmulateInstructionRISCV(ArchSpec("riscv64-unknown-linux-gnu")) { + EmulateInstruction::SetReadRegCallback(ReadRegisterCallback); + EmulateInstruction::SetWriteRegCallback(WriteRegisterCallback); + } + + static bool ReadRegisterCallback(EmulateInstruction *instruction, void *baton, + const RegisterInfo *reg_info, + RegisterValue ®_value) { + RISCVEmulatorTester *tester = (RISCVEmulatorTester *)instruction; + uint32_t reg = reg_info->kinds[eRegisterKindLLDB]; + if (reg == gpr_x0_riscv) + reg_value.SetUInt(0, reg_info->byte_size); + else + reg_value.SetUInt(tester->gpr.gpr[reg], reg_info->byte_size); + return true; + } + + static bool WriteRegisterCallback(EmulateInstruction *instruction, + void *baton, const Context &context, + const RegisterInfo *reg_info, + const RegisterValue ®_value) { + RISCVEmulatorTester *tester = (RISCVEmulatorTester *)instruction; + uint32_t reg = reg_info->kinds[eRegisterKindLLDB]; + if (reg != gpr_x0_riscv) + tester->gpr.gpr[reg] = reg_value.GetAsUInt64(); + return true; + } +}; + +TEST_F(RISCVEmulatorTester, testJAL) { + lldb::addr_t old_pc = 0x114514; + WritePC(old_pc); + // jal x1, -6*4 + uint32_t inst = 0b11111110100111111111000011101111; + ASSERT_TRUE(DecodeAndExecute(inst, false)); + auto x1 = gpr.gpr[1]; + + bool success = false; + auto pc = ReadPC(&success); + + ASSERT_TRUE(success); + ASSERT_EQ(x1, old_pc + 4); + ASSERT_EQ(pc, old_pc + (-6 * 4)); +} + +constexpr uint32_t EncodeIType(uint32_t opcode, uint32_t funct3, uint32_t rd, + uint32_t rs1, uint32_t imm) { + return imm << 20 | rs1 << 15 | funct3 << 12 | rd << 7 | opcode; +} + +constexpr uint32_t JALR(uint32_t rd, uint32_t rs1, int32_t offset) { + return EncodeIType(0b1100111, 0, rd, rs1, uint32_t(offset)); +} + +TEST_F(RISCVEmulatorTester, testJALR) { + lldb::addr_t old_pc = 0x114514; + lldb::addr_t old_x2 = 0x1024; + WritePC(old_pc); + gpr.gpr[2] = old_x2; + // jalr x1, x2(-255) + uint32_t inst = JALR(1, 2, -255); + ASSERT_TRUE(DecodeAndExecute(inst, false)); + auto x1 = gpr.gpr[1]; + + bool success = false; + auto pc = ReadPC(&success); + + ASSERT_TRUE(success); + ASSERT_EQ(x1, old_pc + 4); + // JALR always zeros the bottom bit of the target address. + ASSERT_EQ(pc, (old_x2 + (-255)) & (~1)); +} + +constexpr uint32_t EncodeBType(uint32_t opcode, uint32_t funct3, uint32_t rs1, + uint32_t rs2, uint32_t imm) { + uint32_t bimm = (imm & (0b1 << 11)) >> 4 | (imm & (0b11110)) << 7 | + (imm & (0b111111 << 5)) << 20 | (imm & (0b1 << 12)) << 19; + + return rs2 << 20 | rs1 << 15 | funct3 << 12 | opcode | bimm; +} + +constexpr uint32_t BEQ(uint32_t rs1, uint32_t rs2, int32_t offset) { + return EncodeBType(0b1100011, 0b000, rs1, rs2, uint32_t(offset)); +} + +constexpr uint32_t BNE(uint32_t rs1, uint32_t rs2, int32_t offset) { + return EncodeBType(0b1100011, 0b001, rs1, rs2, uint32_t(offset)); +} + +constexpr uint32_t BLT(uint32_t rs1, uint32_t rs2, int32_t offset) { + return EncodeBType(0b1100011, 0b100, rs1, rs2, uint32_t(offset)); +} + +constexpr uint32_t BGE(uint32_t rs1, uint32_t rs2, int32_t offset) { + return EncodeBType(0b1100011, 0b101, rs1, rs2, uint32_t(offset)); +} + +constexpr uint32_t BLTU(uint32_t rs1, uint32_t rs2, int32_t offset) { + return EncodeBType(0b1100011, 0b110, rs1, rs2, uint32_t(offset)); +} + +constexpr uint32_t BGEU(uint32_t rs1, uint32_t rs2, int32_t offset) { + return EncodeBType(0b1100011, 0b111, rs1, rs2, uint32_t(offset)); +} + +using EncoderB = uint32_t (*)(uint32_t rs1, uint32_t rs2, int32_t offset); + +void testBranch(RISCVEmulatorTester *tester, EncoderB encoder, bool branched, + uint64_t rs1, uint64_t rs2) { + // prepare test registers + lldb::addr_t old_pc = 0x114514; + tester->WritePC(old_pc); + tester->gpr.gpr[1] = rs1; + tester->gpr.gpr[2] = rs2; + // b x1, x2, (-256) + uint32_t inst = encoder(1, 2, -256); + ASSERT_TRUE(tester->DecodeAndExecute(inst, false)); + bool success = false; + auto pc = tester->ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(pc, old_pc + (branched ? (-256) : 0)); +} + +#define GEN_BRANCH_TEST(name, rs1, rs2_branched, rs2_continued) \ + TEST_F(RISCVEmulatorTester, test##name##Branched) { \ + testBranch(this, name, true, rs1, rs2_branched); \ + } \ + TEST_F(RISCVEmulatorTester, test##name##Continued) { \ + testBranch(this, name, false, rs1, rs2_continued); \ + } + +// GEN_BRANCH_TEST(opcode, imm1, imm2, imm3): +// It should branch for instruction `opcode imm1, imm2` +// It should do nothing for instruction `opcode imm1, imm3` +GEN_BRANCH_TEST(BEQ, 1, 1, 0) +GEN_BRANCH_TEST(BNE, 1, 0, 1) +GEN_BRANCH_TEST(BLT, -2, 1, -3) +GEN_BRANCH_TEST(BGE, -2, -3, 1) +GEN_BRANCH_TEST(BLTU, -2, -1, 1) +GEN_BRANCH_TEST(BGEU, -2, 1, -1) + +void testNothing(RISCVEmulatorTester *tester, uint32_t inst) { + lldb::addr_t old_pc = 0x114514; + tester->WritePC(old_pc); + tester->SetInstruction(Opcode(inst, tester->GetByteOrder()), + LLDB_INVALID_ADDRESS, nullptr); + ASSERT_TRUE(tester->EvaluateInstruction(0)); + bool success = false; + auto pc = tester->ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(pc, old_pc); + ASSERT_TRUE( + tester->EvaluateInstruction(eEmulateInstructionOptionAutoAdvancePC)); + pc = tester->ReadPC(&success); + ASSERT_TRUE(success); + ASSERT_EQ(pc, old_pc + 4); +} + +#define GEN_NOTHING_TEST(name, inst) \ + TEST_F(RISCVEmulatorTester, testDoNothing_##name) { testNothing(this, inst); } + +// GEN_NOTHING_TEST(name, inst): +// It should do nothing (except increasing pc) for instruction `inst` +GEN_NOTHING_TEST(mv, 0x01813083) // mv a0, a5 +GEN_NOTHING_TEST(li, 0x00078513) // li a5, 0 +GEN_NOTHING_TEST(sd, 0x02010413) // sd s0, sp(16) +GEN_NOTHING_TEST(lw, 0x0007879b) // lw a5, s0(-20) +GEN_NOTHING_TEST(addi, 0x00113423) // addi sp, sp, -16 diff --git a/lldb/unittests/Symbol/TestTypeSystemClang.cpp b/lldb/unittests/Symbol/TestTypeSystemClang.cpp --- a/lldb/unittests/Symbol/TestTypeSystemClang.cpp +++ b/lldb/unittests/Symbol/TestTypeSystemClang.cpp @@ -500,18 +500,24 @@ for (CompilerType t : {type, typedef_type, auto_type}) { SCOPED_TRACE(t.GetTypeName().AsCString()); - EXPECT_EQ(m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 0), - eTemplateArgumentKindType); - EXPECT_EQ(m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 0), - int_type); - EXPECT_EQ(llvm::None, - m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 0)); - - EXPECT_EQ(m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 1), - eTemplateArgumentKindIntegral); - EXPECT_EQ(m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 1), - CompilerType()); - auto result = m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 1); + const bool expand_pack = false; + EXPECT_EQ( + m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 0, expand_pack), + eTemplateArgumentKindType); + EXPECT_EQ( + m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 0, expand_pack), + int_type); + EXPECT_EQ(llvm::None, m_ast->GetIntegralTemplateArgument( + t.GetOpaqueQualType(), 0, expand_pack)); + + EXPECT_EQ( + m_ast->GetTemplateArgumentKind(t.GetOpaqueQualType(), 1, expand_pack), + eTemplateArgumentKindIntegral); + EXPECT_EQ( + m_ast->GetTypeTemplateArgument(t.GetOpaqueQualType(), 1, expand_pack), + CompilerType()); + auto result = m_ast->GetIntegralTemplateArgument(t.GetOpaqueQualType(), 1, + expand_pack); ASSERT_NE(llvm::None, result); EXPECT_EQ(arg, result->value); EXPECT_EQ(int_type, result->type); diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -339,6 +339,12 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) +# LLVM_INSTALL_PACKAGE_DIR needs to be declared prior to adding the tools +# subdirectory in order to have the value available for llvm-config. +include(GNUInstallPackageDir) +set(LLVM_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/llvm" CACHE STRING + "Path for CMake subdirectory for LLVM (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/llvm')") + set(LLVM_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE STRING "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) @@ -1141,6 +1147,9 @@ add_subdirectory(utils/UnicodeData) add_subdirectory(utils/yaml-bench) add_subdirectory(utils/split-file) + if( LLVM_INCLUDE_TESTS ) + add_subdirectory(utils/unittest) + endif() else() if ( LLVM_INCLUDE_TESTS ) message(FATAL_ERROR "Including tests when not building utils will not work. @@ -1185,9 +1194,6 @@ add_subdirectory(utils/lit) add_subdirectory(test) add_subdirectory(unittests) - if( LLVM_INCLUDE_UTILS ) - add_subdirectory(utils/unittest) - endif() if (WIN32) # This utility is used to prevent crashing tests from calling Dr. Watson on diff --git a/llvm/cmake/modules/CMakeLists.txt b/llvm/cmake/modules/CMakeLists.txt --- a/llvm/cmake/modules/CMakeLists.txt +++ b/llvm/cmake/modules/CMakeLists.txt @@ -1,10 +1,7 @@ -include(GNUInstallPackageDir) include(ExtendPath) include(LLVMDistributionSupport) include(FindPrefixFromConfig) -set(LLVM_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/llvm" CACHE STRING - "Path for CMake subdirectory for LLVM (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/llvm')") # CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below. set(llvm_cmake_builddir "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm") diff --git a/llvm/cmake/modules/FindGRPC.cmake b/llvm/cmake/modules/FindGRPC.cmake --- a/llvm/cmake/modules/FindGRPC.cmake +++ b/llvm/cmake/modules/FindGRPC.cmake @@ -1,4 +1,4 @@ -option(ENABLE_GRPC_REFLECTION "Link clangd-index-server to gRPC Reflection library" OFF) +option(ENABLE_GRPC_REFLECTION "Link to gRPC Reflection library" OFF) # FIXME(kirillbobyrev): Check if gRPC and Protobuf headers can be included at # configure time. @@ -42,7 +42,7 @@ find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin) find_program(PROTOC protoc) if (NOT GRPC_CPP_PLUGIN OR NOT PROTOC) - message(FATAL_ERROR "gRPC C++ Plugin and Protoc must be on $PATH for Clangd remote index build.") + message(FATAL_ERROR "gRPC C++ Plugin and Protoc must be on $PATH for gRPC-enabled build.") endif() # On macOS the libraries are typically installed via Homebrew and are not on # the system path. @@ -132,7 +132,7 @@ ARGS ${Flags} "${ProtoSourceAbsolutePath}" DEPENDS "${ProtoSourceAbsolutePath}") - add_clang_library(${LibraryName} ${GeneratedProtoSource} + add_llvm_library(${LibraryName} ${GeneratedProtoSource} PARTIAL_SOURCES_INTENDED LINK_LIBS PUBLIC grpc++ protobuf) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -794,6 +794,9 @@ # Prevent bugs that can happen with llvm's brace style. add_flag_if_supported("-Wmisleading-indentation" MISLEADING_INDENTATION_FLAG) + + # Enable -Wctad-maybe-unsupported to catch unintended use of CTAD. + add_flag_if_supported("-Wctad-maybe-unsupported" CTAD_MAYBE_UNSPPORTED_FLAG) endif (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) if (LLVM_COMPILER_IS_GCC_COMPATIBLE AND NOT LLVM_ENABLE_WARNINGS) diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -2,6 +2,7 @@ # while LLVM_TARGET_DEPENDS may contain additional file dependencies. # Extra parameters for `tblgen' may come after `ofn' parameter. # Adds the name of the generated file to TABLEGEN_OUTPUT. +include(LLVMDistributionSupport) function(tablegen project ofn) cmake_parse_arguments(ARG "" "" "DEPENDS;EXTRA_INCLUDES" ${ARGN}) @@ -140,7 +141,7 @@ endfunction() macro(add_tablegen target project) - cmake_parse_arguments(ADD_TABLEGEN "" "DESTINATION" "" ${ARGN}) + cmake_parse_arguments(ADD_TABLEGEN "" "DESTINATION;EXPORT" "" ${ARGN}) set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS}) set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen) @@ -190,14 +191,12 @@ endif() if (ADD_TABLEGEN_DESTINATION AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY AND LLVM_BUILD_UTILS) - set(export_to_llvmexports) - if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR - NOT LLVM_DISTRIBUTION_COMPONENTS) - set(export_to_llvmexports EXPORT LLVMExports) + set(export_arg) + if(ADD_TABLEGEN_EXPORT) + get_target_export_arg(${target} ${ADD_TABLEGEN_EXPORT} export_arg) endif() - install(TARGETS ${target} - ${export_to_llvmexports} + ${export_arg} COMPONENT ${target} RUNTIME DESTINATION "${ADD_TABLEGEN_DESTINATION}") if(NOT LLVM_ENABLE_IDE) @@ -206,5 +205,8 @@ COMPONENT ${target}) endif() endif() - set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${target}) + if(ADD_TABLEGEN_EXPORT) + string(TOUPPER ${ADD_TABLEGEN_EXPORT} export_upper) + set_property(GLOBAL APPEND PROPERTY ${export_upper}_EXPORTS ${target}) + endif() endmacro() diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -312,6 +312,10 @@ Disassemble just the specified symbol's instructions. +.. option:: --chained-fixups + + Print chained fixup information. + .. option:: --dyld_info Print bind and rebase information used by dyld to resolve external diff --git a/llvm/docs/CommandGuide/llvm-otool.rst b/llvm/docs/CommandGuide/llvm-otool.rst --- a/llvm/docs/CommandGuide/llvm-otool.rst +++ b/llvm/docs/CommandGuide/llvm-otool.rst @@ -23,6 +23,10 @@ Select slice of universal Mach-O file. +.. option:: -chained_fixups + + Print chained fixup information. + .. option:: -C Print linker optimization hints. diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h --- a/llvm/include/llvm/ADT/Optional.h +++ b/llvm/include/llvm/ADT/Optional.h @@ -348,6 +348,7 @@ return None; } template + LLVM_DEPRECATED("Use transform instead.", "transform") auto map(const Function &F) const & -> Optional { if (*this) return F(value()); @@ -378,6 +379,7 @@ return None; } template + LLVM_DEPRECATED("Use transform instead.", "transform") auto map(const Function &F) && -> Optional { if (*this) diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -141,6 +141,7 @@ std::set Set; using VIterator = typename SmallVector::const_iterator; + using SIterator = typename std::set::const_iterator; using mutable_iterator = typename SmallVector::iterator; // In small mode SmallPtrSet uses linear search for the elements, so it is @@ -171,22 +172,21 @@ } /// insert - Insert an element into the set if it isn't already there. - /// Returns true if the element is inserted (it was not in the set before). - /// The first value of the returned pair is unused and provided for - /// partial compatibility with the standard library self-associative container - /// concept. - // FIXME: Add iterators that abstract over the small and large form, and then - // return those here. - std::pair insert(const T &V) { - if (!isSmall()) - return std::make_pair(None, Set.insert(V).second); + /// Returns a pair. The first value of it is an iterator to the inserted + /// element or the existing element in the set. The second value is true + /// if the element is inserted (it was not in the set before). + std::pair insert(const T &V) { + if (!isSmall()) { + auto [I, Inserted] = Set.insert(V); + return std::make_pair(const_iterator(I), Inserted); + } VIterator I = vfind(V); if (I != Vector.end()) // Don't reinsert if it already exists. - return std::make_pair(None, false); + return std::make_pair(const_iterator(I), false); if (Vector.size() < N) { Vector.push_back(V); - return std::make_pair(None, true); + return std::make_pair(const_iterator(std::prev(Vector.end())), true); } // Otherwise, grow from vector to set. @@ -194,8 +194,7 @@ Set.insert(Vector.back()); Vector.pop_back(); } - Set.insert(V); - return std::make_pair(None, true); + return std::make_pair(const_iterator(Set.insert(V).first), true); } template diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -92,8 +92,8 @@ template using SmallVectorSizeType = - typename std::conditional= 8, uint64_t, - uint32_t>::type; + std::conditional_t= 8, uint64_t, + uint32_t>; /// Figure out the offset of the first element. template struct SmallVectorAlignmentAndSize { diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h --- a/llvm/include/llvm/Analysis/RegionInfoImpl.h +++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h @@ -390,10 +390,10 @@ template void RegionBase::addSubRegion(RegionT *SubRegion, bool moveChildren) { assert(!SubRegion->parent && "SubRegion already has a parent!"); - assert(llvm::find_if(*this, + assert(llvm::none_of(*this, [&](const std::unique_ptr &R) { return R.get() == SubRegion; - }) == children.end() && + }) && "Subregion already exists!"); SubRegion->parent = static_cast(this); diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h --- a/llvm/include/llvm/BinaryFormat/MachO.h +++ b/llvm/include/llvm/BinaryFormat/MachO.h @@ -1002,6 +1002,19 @@ uint64_t n_value; }; +// Values for dyld_chained_fixups_header::imports_format. +enum { + DYLD_CHAINED_IMPORT = 1, + DYLD_CHAINED_IMPORT_ADDEND = 2, + DYLD_CHAINED_IMPORT_ADDEND64 = 3, +}; + +// Values for dyld_chained_fixups_header::symbols_format. +enum { + DYLD_CHAINED_SYMBOL_UNCOMPRESSED = 0, + DYLD_CHAINED_SYMBOL_ZLIB = 1, +}; + /// Structs for dyld chained fixups. /// dyld_chained_fixups_header is the data pointed to by LC_DYLD_CHAINED_FIXUPS /// load command. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -143,9 +143,13 @@ /// Indicate that this basic block is entered via an exception handler. bool IsEHPad = false; - /// Indicate that this basic block is potentially the target of an indirect - /// branch. - bool AddressTaken = false; + /// Indicate that this MachineBasicBlock is referenced somewhere other than + /// as predecessor/successor, a terminator MachineInstr, or a jump table. + bool MachineBlockAddressTaken = false; + + /// If this MachineBasicBlock corresponds to an IR-level "blockaddress" + /// constant, this contains a pointer to that block. + BasicBlock *AddressTakenIRBlock = nullptr; /// Indicate that this basic block needs its symbol be emitted regardless of /// whether the flow just falls-through to it. @@ -216,12 +220,35 @@ /// Return a formatted string to identify this block and its parent function. std::string getFullName() const; - /// Test whether this block is potentially the target of an indirect branch. - bool hasAddressTaken() const { return AddressTaken; } + /// Test whether this block is used as as something other than the target + /// of a terminator, exception-handling target, or jump table. This is + /// either the result of an IR-level "blockaddress", or some form + /// of target-specific branch lowering. + bool hasAddressTaken() const { + return MachineBlockAddressTaken || AddressTakenIRBlock; + } + + /// Test whether this block is used as something other than the target of a + /// terminator, exception-handling target, jump table, or IR blockaddress. + /// For example, its address might be loaded into a register, or + /// stored in some branch table that isn't part of MachineJumpTableInfo. + bool isMachineBlockAddressTaken() const { return MachineBlockAddressTaken; } + + /// Test whether this block is the target of an IR BlockAddress. (There can + /// more than one MBB associated with an IR BB where the address is taken.) + bool isIRBlockAddressTaken() const { return AddressTakenIRBlock; } + + /// Retrieves the BasicBlock which corresponds to this MachineBasicBlock. + BasicBlock *getAddressTakenIRBlock() const { return AddressTakenIRBlock; } + + /// Set this block to indicate that its address is used as something other + /// than the target of a terminator, exception-handling target, jump table, + /// or IR-level "blockaddress". + void setMachineBlockAddressTaken() { MachineBlockAddressTaken = true; } - /// Set this block to reflect that it potentially is the target of an indirect - /// branch. - void setHasAddressTaken() { AddressTaken = true; } + /// Set this block to reflect that it corresponds to an IR-level basic block + /// with a BlockAddress. + void setAddressTakenIRBlock(BasicBlock *BB) { AddressTakenIRBlock = BB; } /// Test whether this block must have its label emitted. bool hasLabelMustBeEmitted() const { return LabelMustBeEmitted; } diff --git a/llvm/include/llvm/CodeGen/RDFGraph.h b/llvm/include/llvm/CodeGen/RDFGraph.h --- a/llvm/include/llvm/CodeGen/RDFGraph.h +++ b/llvm/include/llvm/CodeGen/RDFGraph.h @@ -934,6 +934,8 @@ const DataFlowGraph &G; }; + template Print(const T &, const DataFlowGraph &) -> Print; + template struct PrintNode : Print> { PrintNode(const NodeAddr &x, const DataFlowGraph &g) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h @@ -0,0 +1,39 @@ +//===--- ELF_i386.h - JIT link functions for ELF/i386 --*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for ELF/i386. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_I386_H +#define LLVM_EXECUTIONENGINE_JITLINK_ELF_I386_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +/// Create a LinkGraph from an ELF/i386 relocatable object +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected> +createLinkGraphFromELFObject_i386(MemoryBufferRef ObjectBuffer); + +/// jit-link the given object buffer, which must be a ELF i386 relocatable +/// object file. +void link_ELF_i386(std::unique_ptr G, + std::unique_ptr Ctx); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_I386_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -1004,10 +1004,10 @@ /// Create a section with the given name, protection flags, and alignment. Section &createSection(StringRef Name, MemProt Prot) { - assert(llvm::find_if(Sections, + assert(llvm::none_of(Sections, [&](std::unique_ptr
&Sec) { return Sec->getName() == Name; - }) == Sections.end() && + }) && "Duplicate section name"); std::unique_ptr
Sec(new Section(Name, Prot, Sections.size())); Sections.push_back(std::move(Sec)); @@ -1349,9 +1349,8 @@ assert(ExternalSymbols.count(&Sym) && "Symbol is not in the externals set"); ExternalSymbols.erase(&Sym); Addressable &Base = *Sym.Base; - assert(llvm::find_if(ExternalSymbols, - [&](Symbol *AS) { return AS->Base == &Base; }) == - ExternalSymbols.end() && + assert(llvm::none_of(ExternalSymbols, + [&](Symbol *AS) { return AS->Base == &Base; }) && "Base addressable still in use"); destroySymbol(Sym); destroyAddressable(Base); @@ -1365,9 +1364,8 @@ "Symbol is not in the absolute symbols set"); AbsoluteSymbols.erase(&Sym); Addressable &Base = *Sym.Base; - assert(llvm::find_if(ExternalSymbols, - [&](Symbol *AS) { return AS->Base == &Base; }) == - ExternalSymbols.end() && + assert(llvm::none_of(ExternalSymbols, + [&](Symbol *AS) { return AS->Base == &Base; }) && "Base addressable still in use"); destroySymbol(Sym); destroyAddressable(Base); diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h @@ -0,0 +1,38 @@ +//=== i386.h - Generic JITLink i386 edge kinds, utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing i386 objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_I386_H +#define LLVM_EXECUTIONENGINE_JITLINK_I386_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { +namespace i386 { + +/// Represets i386 fixups +enum EdgeKind_i386 : Edge::Kind { + + /// None + None = Edge::FirstRelocation, + +}; + +/// Returns a string name for the given i386 edge. For debugging purposes +/// only +const char *getEdgeKindName(Edge::Kind K); + +} // namespace i386 +} // namespace jitlink +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_I386_H \ No newline at end of file diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -18,6 +18,7 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/Mangling.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Object/Archive.h" @@ -314,6 +315,40 @@ DenseMap ObjectFilesMap; }; +/// A utility class to create COFF dllimport GOT symbols (__imp_*) and PLT +/// stubs. +/// +/// If an instance of this class is attached to a JITDylib as a fallback +/// definition generator, PLT stubs and dllimport __imp_ symbols will be +/// generated for external symbols found outside the given jitdylib. Currently +/// only supports x86_64 architecture. +class DLLImportDefinitionGenerator : public DefinitionGenerator { +public: + /// Creates a DLLImportDefinitionGenerator instance. + static std::unique_ptr + Create(ExecutionSession &ES, ObjectLinkingLayer &L); + + Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) override; + +private: + DLLImportDefinitionGenerator(ExecutionSession &ES, ObjectLinkingLayer &L) + : ES(ES), L(L) {} + + static Expected getTargetPointerSize(const Triple &TT); + static Expected getTargetEndianness(const Triple &TT); + Expected> + createStubsGraph(const SymbolMap &Resolved); + + static StringRef getImpPrefix() { return "__imp_"; } + + static StringRef getSectionName() { return "$__DLLIMPORT_STUBS"; } + + ExecutionSession &ES; + ObjectLinkingLayer &L; +}; + } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h --- a/llvm/include/llvm/IR/CFG.h +++ b/llvm/include/llvm/IR/CFG.h @@ -47,7 +47,7 @@ using pointer = Ptr *; using reference = Ptr *; -private: +protected: using Self = PredIterator; USE_iterator It; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -84,6 +84,11 @@ int ArgNo = idx.Value; } +// NonNull - The specified argument is not null. +class NonNull : IntrinsicProperty { + int ArgNo = idx.Value; +} + class Align : IntrinsicProperty { int ArgNo = idx.Value; int Align = align; @@ -1407,7 +1412,8 @@ // Intrinsic to wrap a thread local variable. def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + [NonNull, NonNull>, + IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -387,6 +387,7 @@ bool hasRootFile() const { return !Header.RootFile.Name.empty(); } + MCDwarfFile &getRootFile() { return Header.RootFile; } const MCDwarfFile &getRootFile() const { return Header.RootFile; } // Report whether MD5 usage has been consistent (all-or-none). diff --git a/llvm/include/llvm/Support/ARMAttributeParser.h b/llvm/include/llvm/Support/ARMAttributeParser.h --- a/llvm/include/llvm/Support/ARMAttributeParser.h +++ b/llvm/include/llvm/Support/ARMAttributeParser.h @@ -70,6 +70,7 @@ Error PACRET_use(ARMBuildAttrs::AttrType tag); Error BTI_use(ARMBuildAttrs::AttrType tag); Error nodefaults(ARMBuildAttrs::AttrType tag); + Error also_compatible_with(ARMBuildAttrs::AttrType tag); public: ARMAttributeParser(ScopedPrinter *sw) diff --git a/llvm/include/llvm/Support/ARMBuildAttributes.h b/llvm/include/llvm/Support/ARMBuildAttributes.h --- a/llvm/include/llvm/Support/ARMBuildAttributes.h +++ b/llvm/include/llvm/Support/ARMBuildAttributes.h @@ -263,8 +263,6 @@ PACRETUsed = 1 }; -std::string encodeAttrTagValuePair(StringRef OriginalString); - } // namespace ARMBuildAttrs } // namespace llvm diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h --- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -712,7 +712,7 @@ assert(IsPostDom && "This function is only for postdominators"); // The tree has only trivial roots -- nothing to update. - if (std::none_of(DT.Roots.begin(), DT.Roots.end(), [BUI](const NodePtr N) { + if (llvm::none_of(DT.Roots, [BUI](const NodePtr N) { return HasForwardSuccessors(N, BUI); })) return; diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h --- a/llvm/include/llvm/Support/ScopedPrinter.h +++ b/llvm/include/llvm/Support/ScopedPrinter.h @@ -344,6 +344,10 @@ startLine() << Label << ": " << Value << "\n"; } + void printStringEscaped(StringRef Label, StringRef Value) { + printStringEscapedImpl(Label, Value); + } + void printBinary(StringRef Label, StringRef Str, ArrayRef Value) { printBinaryImpl(Label, Str, Value, false); } @@ -478,6 +482,12 @@ startLine() << Label << ": " << Str << " (" << Value << ")\n"; } + virtual void printStringEscapedImpl(StringRef Label, StringRef Value) { + startLine() << Label << ": "; + OS.write_escaped(Value); + OS << '\n'; + } + void scopedBegin(char Symbol) { startLine() << Symbol << '\n'; indent(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -240,10 +240,6 @@ /// devirtualization and control-flow integrity. ModulePass *createGlobalSplitPass(); -/// Write ThinLTO-ready bitcode to Str. -ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str, - raw_ostream *ThinLinkOS = nullptr); - } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -161,6 +161,7 @@ Value *optimizeStrNDup(CallInst *CI, IRBuilderBase &B); Value *optimizeStrCpy(CallInst *CI, IRBuilderBase &B); Value *optimizeStpCpy(CallInst *CI, IRBuilderBase &B); + Value *optimizeStrLCpy(CallInst *CI, IRBuilderBase &B); Value *optimizeStrNCpy(CallInst *CI, IRBuilderBase &B); Value *optimizeStrLen(CallInst *CI, IRBuilderBase &B); Value *optimizeStrNLen(CallInst *CI, IRBuilderBase &B); @@ -222,6 +223,8 @@ Value *optimizePuts(CallInst *CI, IRBuilderBase &B); // Helper methods + Value* emitSnPrintfMemCpy(CallInst *CI, Value *StrArg, StringRef Str, + uint64_t N, IRBuilderBase &B); Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilderBase &B); void classifyArgUse(Value *Val, Function *F, bool IsFloat, diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2868,6 +2868,11 @@ if (isImpliedCondition(LHS, RHS, Q.DL).value_or(false)) return getTrue(ITy); break; + case ICmpInst::ICMP_SLE: + /// SLE follows the same logic as SGE with the LHS and RHS swapped. + if (isImpliedCondition(RHS, LHS, Q.DL).value_or(false)) + return getTrue(ITy); + break; } return nullptr; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1575,9 +1575,45 @@ RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); Known2 = KnownBits(BitWidth); + // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); + + // If this failed, see if we can use a conditional branch into the phi + // to help us determine the range of the value. + if (Known2.isUnknown()) { + ICmpInst::Predicate Pred; + const APInt *RHSC; + BasicBlock *TrueSucc, *FalseSucc; + // TODO: Use RHS Value and compute range from its known bits. + if (match(RecQ.CxtI, + m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)), + m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { + // Check for cases of duplicate successors. + if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) { + // If we're using the false successor, invert the predicate. + if (FalseSucc == P->getParent()) + Pred = CmpInst::getInversePredicate(Pred); + + switch (Pred) { + case CmpInst::Predicate::ICMP_EQ: + Known2 = KnownBits::makeConstant(*RHSC); + break; + case CmpInst::Predicate::ICMP_ULE: + Known2.Zero.setHighBits(RHSC->countLeadingZeros()); + break; + case CmpInst::Predicate::ICMP_ULT: + Known2.Zero.setHighBits((*RHSC - 1).countLeadingZeros()); + break; + default: + // TODO - add additional integer predicate handling. + break; + } + } + } + } + Known = KnownBits::commonBits(Known, Known2); // If all bits have been ruled out, there's no need to check // more operands. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3545,21 +3545,21 @@ // reference the block. It is possible that there is more than one label // here, because multiple LLVM BB's may have been RAUW'd to this block after // the references were generated. - const BasicBlock *BB = MBB.getBasicBlock(); - if (MBB.hasAddressTaken()) { + if (MBB.isIRBlockAddressTaken()) { if (isVerbose()) OutStreamer->AddComment("Block address taken"); - // MBBs can have their address taken as part of CodeGen without having - // their corresponding BB's address taken in IR - if (BB && BB->hasAddressTaken()) - for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB)) - OutStreamer->emitLabel(Sym); + BasicBlock *BB = MBB.getAddressTakenIRBlock(); + assert(BB && BB->hasAddressTaken() && "Missing BB"); + for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB)) + OutStreamer->emitLabel(Sym); + } else if (isVerbose() && MBB.isMachineBlockAddressTaken()) { + OutStreamer->AddComment("Block address taken"); } // Print some verbose block comments. if (isVerbose()) { - if (BB) { + if (const BasicBlock *BB = MBB.getBasicBlock()) { if (BB->hasName()) { BB->printAsOperand(OutStreamer->getCommentOS(), /*PrintType=*/false, BB->getModule()); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -3350,11 +3350,13 @@ if (const auto *MemberDecl = dyn_cast_or_null( DIGV->getRawStaticDataMemberDeclaration())) Scope = MemberDecl->getScope(); - // For Fortran, the scoping portion is elided in its name so that we can - // reference the variable in the command line of the VS debugger. + // For static local variables and Fortran, the scoping portion is elided + // in its name so that we can reference the variable in the command line + // of the VS debugger. std::string QualifiedName = - (moduleIsInFortran()) ? std::string(DIGV->getName()) - : getFullyQualifiedName(Scope, DIGV->getName()); + (moduleIsInFortran() || isa(Scope)) + ? std::string(DIGV->getName()) + : getFullyQualifiedName(Scope, DIGV->getName()); if (const GlobalVariable *GV = CVGV.GVInfo.dyn_cast()) { diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -663,9 +663,10 @@ Asm->OutStreamer->emitLabel(CSRange.ExceptionLabel); // Emit the LSDA header. - // If only one call-site range exists, LPStart is omitted as it is the - // same as the function entry. - if (CallSiteRanges.size() == 1) { + // LPStart is omitted if either we have a single call-site range (in which + // case the function entry is treated as @LPStart) or if this function has + // no landing pads (in which case @LPStart is undefined). + if (CallSiteRanges.size() == 1 || LandingPadRange == nullptr) { Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); } else if (!Asm->isPositionIndependent()) { // For more than one call-site ranges, LPStart must be explicitly diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7754,14 +7754,14 @@ return false; // Check that GEP is used outside the block, meaning it's alive on the // IndirectBr edge(s). - if (find_if(GEPI->users(), [&](User *Usr) { + if (llvm::none_of(GEPI->users(), [&](User *Usr) { if (auto *I = dyn_cast(Usr)) { if (I->getParent() != SrcBlock) { return true; } } return false; - }) == GEPI->users().end()) + })) return false; // The second elements of the GEP chains to be unmerged. std::vector UGEPIs; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3437,7 +3437,7 @@ MF->push_back(MBB); if (BB.hasAddressTaken()) - MBB->setHasAddressTaken(); + MBB->setAddressTakenIRBlock(const_cast(&BB)); if (!HasMustTailInVarArgFn) HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB); diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -134,9 +134,11 @@ return false; // Check that subrange is live at UseIdx. - if (MO.getSubReg()) { + if (li.hasSubRanges()) { const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); - LaneBitmask LM = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + unsigned SubReg = MO.getSubReg(); + LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg) + : MRI.getMaxLaneMaskForVReg(MO.getReg()); for (LiveInterval::SubRange &SR : li.subranges()) { if ((SR.LaneMask & LM).none()) continue; diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -114,7 +114,6 @@ kw_call_entry, kw_custom, kw_liveout, - kw_address_taken, kw_landing_pad, kw_inlineasm_br_indirect_target, kw_ehfunclet_entry, @@ -129,6 +128,8 @@ kw_bbsections, kw_unknown_size, kw_unknown_address, + kw_ir_block_address_taken, + kw_machine_block_address_taken, // Metadata types. kw_distinct, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -258,7 +258,6 @@ .Case("call-entry", MIToken::kw_call_entry) .Case("custom", MIToken::kw_custom) .Case("liveout", MIToken::kw_liveout) - .Case("address-taken", MIToken::kw_address_taken) .Case("landing-pad", MIToken::kw_landing_pad) .Case("inlineasm-br-indirect-target", MIToken::kw_inlineasm_br_indirect_target) @@ -275,6 +274,8 @@ .Case("unknown-size", MIToken::kw_unknown_size) .Case("unknown-address", MIToken::kw_unknown_address) .Case("distinct", MIToken::kw_distinct) + .Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken) + .Case("machine-block-address-taken", MIToken::kw_machine_block_address_taken) .Default(MIToken::Identifier); } diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -495,6 +495,7 @@ MachineOperand &Dest, Optional &TiedDefIdx); bool parseOffset(int64_t &Offset); + bool parseIRBlockAddressTaken(BasicBlock *&BB); bool parseAlignment(uint64_t &Alignment); bool parseAddrspace(unsigned &Addrspace); bool parseSectionID(Optional &SID); @@ -669,7 +670,8 @@ auto Loc = Token.location(); auto Name = Token.stringValue(); lex(); - bool HasAddressTaken = false; + bool MachineBlockAddressTaken = false; + BasicBlock *AddressTakenIRBlock = nullptr; bool IsLandingPad = false; bool IsInlineAsmBrIndirectTarget = false; bool IsEHFuncletEntry = false; @@ -680,10 +682,14 @@ do { // TODO: Report an error when multiple same attributes are specified. switch (Token.kind()) { - case MIToken::kw_address_taken: - HasAddressTaken = true; + case MIToken::kw_machine_block_address_taken: + MachineBlockAddressTaken = true; lex(); break; + case MIToken::kw_ir_block_address_taken: + if (parseIRBlockAddressTaken(AddressTakenIRBlock)) + return true; + break; case MIToken::kw_landing_pad: IsLandingPad = true; lex(); @@ -701,6 +707,7 @@ return true; break; case MIToken::IRBlock: + case MIToken::NamedIRBlock: // TODO: Report an error when both name and ir block are specified. if (parseIRBlock(BB, MF.getFunction())) return true; @@ -736,8 +743,10 @@ Twine(ID)); if (Alignment) MBB->setAlignment(Align(Alignment)); - if (HasAddressTaken) - MBB->setHasAddressTaken(); + if (MachineBlockAddressTaken) + MBB->setMachineBlockAddressTaken(); + if (AddressTakenIRBlock) + MBB->setAddressTakenIRBlock(AddressTakenIRBlock); MBB->setIsEHPad(IsLandingPad); MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget); MBB->setIsEHFuncletEntry(IsEHFuncletEntry); @@ -2918,6 +2927,19 @@ return false; } +bool MIParser::parseIRBlockAddressTaken(BasicBlock *&BB) { + assert(Token.is(MIToken::kw_ir_block_address_taken)); + lex(); + if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock)) + return error("expected basic block after 'ir_block_address_taken'"); + + if (parseIRBlock(BB, MF.getFunction())) + return true; + + lex(); + return false; +} + bool MIParser::parseAlignment(uint64_t &Alignment) { assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign)); lex(); diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -193,12 +193,10 @@ "lowest stage of an interval in this LR") \ M(float, progress, {1}, "ratio of current queue size to initial size") -// The model learns to pick one of the mask == 1 interferences. This is the name -// of the output tensor. -// The contract with the model is that the output will be guaranteed to be to a -// mask == 1 position. -// Using a macro here to avoid 'not used' warnings (and keep cond compilation to -// a minimum) +// The model learns to pick one of the mask == 1 interferences. This is the +// name of the output tensor. The contract with the model is that the output +// will be guaranteed to be to a mask == 1 position. Using a macro here to +// avoid 'not used' warnings (and keep cond compilation to a minimum) #define DecisionName "index_to_evict" // Named features index. @@ -211,7 +209,8 @@ // The ML advisor will typically have a sparse input to the evaluator, because // various phys regs won't be available. It's easier (maintenance-wise) to -// bulk-reset the state of the evaluator each time we are about to use it again. +// bulk-reset the state of the evaluator each time we are about to use it +// again. template size_t getTotalSize(const std::vector &Shape) { size_t Ret = sizeof(T); for (const auto V : Shape) @@ -227,8 +226,8 @@ #undef _RESET } -// Per-live interval components that get aggregated into the feature values that -// will be passed to the evaluator. +// Per-live interval components that get aggregated into the feature values +// that will be passed to the evaluator. struct LIFeatureComponents { double R = 0; double W = 0; @@ -242,7 +241,8 @@ using CandidateRegList = std::array, NumberOfInterferences>; -using FeaturesListNormalizer = std::array; +using FeaturesListNormalizer = + llvm::SmallVector; /// The ML evictor (commonalities between release and development mode) class MLEvictAdvisor : public RegAllocEvictionAdvisor { @@ -260,10 +260,10 @@ // error, and we shouldn't be asking for it here. const MLModelRunner &getRunner() const { return *Runner; } - /// This just calls Evaluate on the Runner, but in the development mode case, - /// if we're just capturing the log of the default advisor, it needs to call - /// the latter instead, so we need to pass all the necessary parameters for - /// it. In the development case, it will also log. + /// This just calls Evaluate on the Runner, but in the development mode + /// case, if we're just capturing the log of the default advisor, it needs + /// to call the latter instead, so we need to pass all the necessary + /// parameters for it. In the development case, it will also log. virtual int64_t tryFindEvictionCandidatePosition(const LiveInterval &VirtReg, const AllocationOrder &Order, @@ -272,11 +272,11 @@ /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. - bool - loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, - bool IsHint, const SmallVirtRegSet &FixedRegisters, - std::array &Largest, - size_t Pos) const; + bool loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, + bool IsHint, + const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl &Largest, + size_t Pos) const; private: static float getInitialQueueSize(const MachineFunction &MF); @@ -287,11 +287,12 @@ const SmallVirtRegSet &FixedRegisters) const override; void extractFeatures(const SmallVectorImpl &Intervals, - std::array &Largest, - size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, + llvm::SmallVectorImpl &Largest, size_t Pos, + int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const; - // Point-in-time: we didn't learn this, so we always delegate to the default. + // Point-in-time: we didn't learn this, so we always delegate to the + // default. bool canEvictHintInterference( const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const override { @@ -303,9 +304,9 @@ getLIFeatureComponents(const LiveInterval &LI) const; // Hold on to a default advisor for: - // 1) the implementation of canEvictHintInterference, because we didn't learn - // that nuance yet; - // 2) for bootstrapping (logging) in the development mode case. + // 1) the implementation of canEvictHintInterference, because we didn't + // learn that nuance yet; 2) for bootstrapping (logging) in the development + // mode case. const DefaultEvictionAdvisor DefaultAdvisor; MLModelRunner *const Runner; const MachineBlockFrequencyInfo &MBFI; @@ -323,10 +324,6 @@ #define _DECL_FEATURES(type, name, shape, _) \ TensorSpec::createSpec(#name, shape), -static const std::vector InputFeatures{ - {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}, -}; -#undef _DECL_FEATURES // =================================== // Release (AOT) - specifics // =================================== @@ -334,13 +331,17 @@ : public RegAllocEvictionAdvisorAnalysis { public: ReleaseModeEvictionAdvisorAnalysis() - : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) {} + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Release) { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { return R->getAdvisorMode() == AdvisorMode::Release; } private: + std::vector InputFeatures; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); @@ -370,19 +371,12 @@ static const TensorSpec Reward = TensorSpec::createSpec("reward", {1}); // Features we bind on the model. The tensor names have a prefix, and we also -// need to include some tensors that are expected to be present by the training -// algo. +// need to include some tensors that are expected to be present by the +// training algo. // TODO: can we just get rid of these? #define _DECL_TRAIN_FEATURES(type, name, shape, _) \ TensorSpec::createSpec(std::string("action_") + #name, shape), -static const std::vector TrainingInputFeatures{ - {RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) - TensorSpec::createSpec("action_discount", {1}), - TensorSpec::createSpec("action_step_type", {1}), - TensorSpec::createSpec("action_reward", {1})}}; -#undef _DECL_TRAIN_FEATURES - class DevelopmentModeEvictAdvisor : public MLEvictAdvisor { public: DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, @@ -404,7 +398,14 @@ : public RegAllocEvictionAdvisorAnalysis { public: DevelopmentModeEvictionAdvisorAnalysis() - : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) {} + : RegAllocEvictionAdvisorAnalysis(AdvisorMode::Development) { + InputFeatures = {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}; + TrainingInputFeatures = { + RA_EVICT_FEATURES_LIST(_DECL_TRAIN_FEATURES) + TensorSpec::createSpec("action_discount", {1}), + TensorSpec::createSpec("action_step_type", {1}), + TensorSpec::createSpec("action_reward", {1})}; + } // support for isa<> and dyn_cast. static bool classof(const RegAllocEvictionAdvisorAnalysis *R) { return R->getAdvisorMode() == AdvisorMode::Development; @@ -420,6 +421,9 @@ } private: + std::vector InputFeatures; + std::vector TrainingInputFeatures; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); @@ -486,6 +490,7 @@ std::unique_ptr Runner; StringMap> LogMap; }; + #endif //#ifdef LLVM_HAVE_TF_API } // namespace @@ -529,8 +534,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures( const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, - const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest, - size_t Pos) const { + const SmallVirtRegSet &FixedRegisters, + llvm::SmallVectorImpl &Largest, size_t Pos) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) { // leave unavailable @@ -547,8 +552,8 @@ SmallVector InterferingIntervals; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - // Different from the default heuristic, we don't make any assumptions about - // what having more than 10 results in the query may mean. + // Different from the default heuristic, we don't make any assumptions + // about what having more than 10 results in the query may mean. const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff); if (IFIntervals.empty() && InterferingIntervals.empty()) continue; @@ -605,14 +610,14 @@ // max, then any of the costs of the legally-evictable intervals // would be lower. When that happens, one of those will be selected. // Therefore, we allow the candidate be selected, unless the candidate is - // unspillable, in which case it would be incorrect to not find a register for - // it. + // unspillable, in which case it would be incorrect to not find a register + // for it. const bool MustFindEviction = (!VirtReg.isSpillable() && CostPerUseLimit == static_cast(~0u)); // Number of available candidates - if 0, no need to continue. size_t Available = 0; - // Make sure we don't have leftover partial state from an attempt where we had - // no available candidates and bailed out early. + // Make sure we don't have leftover partial state from an attempt where we + // had no available candidates and bailed out early. resetInputs(*Runner); // Track the index->register mapping because AllocationOrder doesn't do that @@ -625,15 +630,13 @@ // only normalize (some of) the float features, but it's just simpler to // dimension 'Largest' to all the features, especially since we have the // 'DoNotNormalize' list. - FeaturesListNormalizer Largest; - Largest.fill(0.0); - - // Same overal idea as in the default eviction policy - we visit the values of - // AllocationOrder one at a time. If it's not legally available, we mask off - // the corresponding feature column (==do nothing because we already reset all - // the features to 0) - // Use Pos to capture the column we load features at - in AllocationOrder - // order. + FeaturesListNormalizer Largest(FeatureIDs::FeatureCount, 0.0); + + // Same overal idea as in the default eviction policy - we visit the values + // of AllocationOrder one at a time. If it's not legally available, we mask + // off the corresponding feature column (==do nothing because we already + // reset all the features to 0) Use Pos to capture the column we load + // features at - in AllocationOrder order. size_t Pos = 0; for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E; ++I, ++Pos) { @@ -660,7 +663,8 @@ Regs[CandidateVirtRegPos].second = !MustFindEviction; if (!MustFindEviction) extractFeatures(SmallVector(1, &VirtReg), Largest, - CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, + CandidateVirtRegPos, /*IsHint*/ 0, + /*LocalIntfsCount*/ 0, /*NrUrgent*/ 0.0); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " "nothing to allocate initially."); @@ -747,8 +751,8 @@ // of accummulating the various features, we keep them separate. void MLEvictAdvisor::extractFeatures( const SmallVectorImpl &Intervals, - std::array &Largest, size_t Pos, - int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const { + llvm::SmallVectorImpl &Largest, size_t Pos, int64_t IsHint, + int64_t LocalIntfsCount, float NrUrgent) const { int64_t NrDefsAndUses = 0; int64_t NrBrokenHints = 0; double R = 0.0; @@ -854,9 +858,9 @@ } else { MCRegister PhysReg = getDefaultAdvisor().tryFindEvictionCandidate( VirtReg, Order, CostPerUseLimit, FixedRegisters); - // Find the index of the selected PhysReg. We need it for logging, otherwise - // this is wasted cycles (but so would starting development mode without a - // model nor logging) + // Find the index of the selected PhysReg. We need it for logging, + // otherwise this is wasted cycles (but so would starting development mode + // without a model nor logging) if (!PhysReg) Ret = CandidateVirtRegPos; else diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -476,6 +476,28 @@ os << "bb." << getNumber(); bool hasAttributes = false; + auto PrintBBRef = [&](const BasicBlock *bb) { + os << "%ir-block."; + if (bb->hasName()) { + os << bb->getName(); + } else { + int slot = -1; + + if (moduleSlotTracker) { + slot = moduleSlotTracker->getLocalSlot(bb); + } else if (bb->getParent()) { + ModuleSlotTracker tmpTracker(bb->getModule(), false); + tmpTracker.incorporateFunction(*bb->getParent()); + slot = tmpTracker.getLocalSlot(bb); + } + + if (slot == -1) + os << ""; + else + os << slot; + } + }; + if (printNameFlags & PrintNameIr) { if (const auto *bb = getBasicBlock()) { if (bb->hasName()) { @@ -483,29 +505,21 @@ } else { hasAttributes = true; os << " ("; - - int slot = -1; - - if (moduleSlotTracker) { - slot = moduleSlotTracker->getLocalSlot(bb); - } else if (bb->getParent()) { - ModuleSlotTracker tmpTracker(bb->getModule(), false); - tmpTracker.incorporateFunction(*bb->getParent()); - slot = tmpTracker.getLocalSlot(bb); - } - - if (slot == -1) - os << ""; - else - os << (Twine("%ir-block.") + Twine(slot)).str(); + PrintBBRef(bb); } } } if (printNameFlags & PrintNameAttributes) { - if (hasAddressTaken()) { + if (isMachineBlockAddressTaken()) { + os << (hasAttributes ? ", " : " ("); + os << "machine-block-address-taken"; + hasAttributes = true; + } + if (isIRBlockAddressTaken()) { os << (hasAttributes ? ", " : " ("); - os << "address-taken"; + os << "ir-block-address-taken "; + PrintBBRef(getAddressTakenIRBlock()); hasAttributes = true; } if (isEHPad()) { diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -632,6 +632,13 @@ } } + if (MBB->isIRBlockAddressTaken()) { + if (!MBB->getAddressTakenIRBlock()->hasAddressTaken()) + report("ir-block-address-taken is associated with basic block not used by " + "a blockaddress.", + MBB); + } + // Count the number of landing pad successors. SmallPtrSet LandingPadSuccs; for (const auto *succ : MBB->successors()) { diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -281,7 +281,6 @@ Register traceCopies(Register VirtReg) const; Register traceCopyChain(Register Reg) const; - bool shouldAllocateRegister(const Register Reg) const; int getStackSpaceFor(Register VirtReg); void spill(MachineBasicBlock::iterator Before, Register VirtReg, MCPhysReg AssignedReg, bool Kill, bool LiveOut); @@ -301,12 +300,6 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false, false) -bool RegAllocFast::shouldAllocateRegister(const Register Reg) const { - assert(Register::isVirtualRegister(Reg)); - const TargetRegisterClass &RC = *MRI->getRegClass(Reg); - return ShouldAllocateClass(*TRI, RC); -} - void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) RegUnitStates[*UI] = NewState; @@ -846,8 +839,6 @@ assert(MO.isUndef() && "expected undef use"); Register VirtReg = MO.getReg(); assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg"); - if (!shouldAllocateRegister(VirtReg)) - return; LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); MCPhysReg PhysReg; @@ -873,8 +864,6 @@ /// (tied or earlyclobber) that may interfere with preassigned uses. void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg) { - if (!shouldAllocateRegister(VirtReg)) - return; LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end()) { MCPhysReg PrevReg = LRI->PhysReg; @@ -908,8 +897,6 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, bool LookAtPhysRegUses) { assert(VirtReg.isVirtual() && "Not a virtual register"); - if (!shouldAllocateRegister(VirtReg)) - return; MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; @@ -960,8 +947,6 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg) { assert(VirtReg.isVirtual() && "Not a virtual register"); - if (!shouldAllocateRegister(VirtReg)) - return; MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; @@ -986,13 +971,8 @@ Register Hint; if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) { Hint = MI.getOperand(0).getReg(); - if (Hint.isVirtual()) { - assert(!shouldAllocateRegister(Hint)); - Hint = Register(); - } else { - assert(Hint.isPhysical() && - "Copy destination should already be assigned"); - } + assert(Hint.isPhysical() && + "Copy destination should already be assigned"); } allocVirtReg(MI, *LRI, Hint, false); if (LRI->Error) { @@ -1100,8 +1080,6 @@ assert(RegClassDefCounts.size() == TRI->getNumRegClasses()); if (Reg.isVirtual()) { - if (!shouldAllocateRegister(Reg)) - return; const TargetRegisterClass *OpRC = MRI->getRegClass(Reg); for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); RCIdx != RCIdxEnd; ++RCIdx) { @@ -1161,8 +1139,6 @@ if (MO.isReg()) { Register Reg = MO.getReg(); if (Reg.isVirtual()) { - if (!shouldAllocateRegister(Reg)) - continue; if (MO.isDef()) { HasDef = true; HasVRegDef = true; @@ -1226,7 +1202,7 @@ } if (MO.isDef()) { - if (Reg.isVirtual() && shouldAllocateRegister(Reg)) + if (Reg.isVirtual()) DefOperandIndexes.push_back(I); addRegClassDefCounts(RegClassDefCounts, Reg); @@ -1316,10 +1292,6 @@ Register Reg = MO.getReg(); if (!Reg) continue; - if (Reg.isVirtual()) { - assert(!shouldAllocateRegister(Reg)); - continue; - } assert(Reg.isPhysical()); if (MRI->isReserved(Reg)) continue; @@ -1366,7 +1338,7 @@ if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual() || !shouldAllocateRegister(Reg)) + if (!Reg.isVirtual()) continue; if (MO.isUndef()) { @@ -1393,7 +1365,7 @@ if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual() || !shouldAllocateRegister(Reg)) + if (!Reg.isVirtual()) continue; assert(MO.isUndef() && "Should only have undef virtreg uses left"); @@ -1416,10 +1388,6 @@ Register Reg = MO.getReg(); if (!Reg) continue; - if (Reg.isVirtual()) { - assert(!shouldAllocateRegister(Reg)); - continue; - } assert(Reg.isPhysical() && "should have register assigned"); // We sometimes get odd situations like: @@ -1449,8 +1417,6 @@ for (Register Reg : MI.getUsedDebugRegs()) { if (!Register::isVirtualRegister(Reg)) continue; - if (!shouldAllocateRegister(Reg)) - continue; // Already spilled to a stackslot? int SS = StackSlotForVirtReg[Reg]; @@ -1491,7 +1457,7 @@ continue; Register Reg = MO.getReg(); - if (!Reg.isVirtual() || !shouldAllocateRegister(Reg)) + if (!Reg.isVirtual()) continue; DenseMap::iterator DI; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13213,6 +13213,26 @@ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } + // Fold (iM_signext_inreg + // (extract_subvector (zext|anyext|sext iN_v to _) _) + // from iN) + // -> (extract_subvector (signext iN_v to iM)) + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && + ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { + SDValue InnerExt = N0.getOperand(0); + EVT InnerExtVT = InnerExt->getValueType(0); + SDValue Extendee = InnerExt->getOperand(0); + + if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) { + SDValue SignExtExtendee = + DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee, + N0.getOperand(1)); + } + } + return SDValue(); } @@ -14677,8 +14697,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); - bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); + SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); + SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -14775,8 +14795,10 @@ // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); - bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); + SDNode *CFP00 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + SDNode *CFP01 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { @@ -14796,8 +14818,10 @@ } if (N1.getOpcode() == ISD::FMUL) { - bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); - bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); + SDNode *CFP10 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + SDNode *CFP11 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { @@ -14817,7 +14841,8 @@ } if (N0.getOpcode() == ISD::FADD) { - bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + SDNode *CFP00 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { @@ -14827,7 +14852,8 @@ } if (N1.getOpcode() == ISD::FADD) { - bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + SDNode *CFP10 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { @@ -22846,25 +22872,31 @@ SDLoc DL(N); EVT IntVT = VT.changeVectorElementTypeToInteger(); EVT IntSVT = VT.getVectorElementType().changeTypeToInteger(); - IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT); - SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); - SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); - SmallVector AndMask(NumElts, DAG.getUNDEF(IntSVT)); - for (int I = 0; I != (int)NumElts; ++I) - if (0 <= Mask[I]) - AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; - - // See if a clear mask is legal instead of going via - // XformToShuffleWithZero which loses UNDEF mask elements. - if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) - return DAG.getBitcast( - VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), - DAG.getConstant(0, DL, IntVT), ClearMask)); - - if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) - return DAG.getBitcast( - VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), - DAG.getBuildVector(IntVT, DL, AndMask))); + // Transform the type to a legal type so that the buildvector constant + // elements are not illegal. Make sure that the result is larger than the + // original type, incase the value is split into two (eg i64->i32). + if (!TLI.isTypeLegal(IntSVT) && LegalTypes) + IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT); + if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) { + SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); + SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); + SmallVector AndMask(NumElts, DAG.getUNDEF(IntSVT)); + for (int I = 0; I != (int)NumElts; ++I) + if (0 <= Mask[I]) + AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; + + // See if a clear mask is legal instead of going via + // XformToShuffleWithZero which loses UNDEF mask elements. + if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) + return DAG.getBitcast( + VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), + DAG.getConstant(0, DL, IntVT), ClearMask)); + + if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) + return DAG.getBitcast( + VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), + DAG.getBuildVector(IntVT, DL, AndMask))); + } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -270,7 +270,7 @@ // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only // the first one should be marked. if (BB.hasAddressTaken()) - MBB->setHasAddressTaken(); + MBB->setAddressTakenIRBlock(const_cast(&BB)); // Mark landing pad blocks. if (BB.isEHPad()) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4508,6 +4508,9 @@ return true; switch (Opcode) { + case ISD::VALUETYPE: + return true; + case ISD::UNDEF: return PoisonOnly; @@ -4564,6 +4567,8 @@ unsigned Opcode = Op.getOpcode(); switch (Opcode) { + case ISD::AssertSext: + case ISD::AssertZext: case ISD::FREEZE: case ISD::AND: case ISD::OR: @@ -4575,6 +4580,7 @@ case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: + case ISD::SIGN_EXTEND_INREG: case ISD::BITCAST: return false; @@ -5927,11 +5933,11 @@ // Canonicalize: // binop(const, nonconst) -> binop(nonconst, const) - bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); - bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); - bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); - bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); - if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) + SDNode *N1C = isConstantIntBuildVectorOrConstantInt(N1); + SDNode *N2C = isConstantIntBuildVectorOrConstantInt(N2); + SDNode *N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + SDNode *N2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((N1C && !N2C) || (N1CFP && !N2CFP)) std::swap(N1, N2); // Canonicalize: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3007,7 +3007,7 @@ BasicBlock *Dest = I.getIndirectDest(i); MachineBasicBlock *Target = FuncInfo.MBBMap[Dest]; Target->setIsInlineAsmBrIndirectTarget(); - Target->setHasAddressTaken(); + Target->setMachineBlockAddressTaken(); // Don't add duplicate machine successors. if (Dests.insert(Dest).second) addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); @@ -4733,7 +4733,8 @@ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlign().value() < MemVT.getSizeInBits() / 8) + if (!TLI.supportsUnalignedAtomics() && + I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1889,11 +1889,24 @@ // string that sorts between .CRT$XCA and .CRT$XCU. In the general case, we // make a name like ".CRT$XCT12345", since that runs before .CRT$XCU. Really // low priorities need to sort before 'L', since the CRT uses that - // internally, so we use ".CRT$XCA00001" for them. + // internally, so we use ".CRT$XCA00001" for them. We have a contract with + // the frontend that "init_seg(compiler)" corresponds to priority 200 and + // "init_seg(lib)" corresponds to priority 400, and those respectively use + // 'C' and 'L' without the priority suffix. Priorities between 200 and 400 + // use 'C' with the priority as a suffix. SmallString<24> Name; + char LastLetter = 'T'; + bool AddPrioritySuffix = Priority != 200 && Priority != 400; + if (Priority < 200) + LastLetter = 'A'; + else if (Priority < 400) + LastLetter = 'C'; + else if (Priority == 400) + LastLetter = 'L'; raw_svector_ostream OS(Name); - OS << ".CRT$X" << (IsCtor ? "C" : "T") << - (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); + OS << ".CRT$X" << (IsCtor ? "C" : "T") << LastLetter; + if (AddPrioritySuffix) + OS << format("%05u", Priority); MCSectionCOFF *Sec = Ctx.getCOFFSection( Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -947,7 +947,7 @@ continue; if (isa(&I) && isa(I.getOperand(0)) && - BBIsInLoop(&BB)) { + isa(I.getType()) && BBIsInLoop(&BB)) { LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << I.getOperand(0) << "\n"); EVT ZExtVT = TLI->getValueType(DL, I.getType()); diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp --- a/llvm/lib/Debuginfod/Debuginfod.cpp +++ b/llvm/lib/Debuginfod/Debuginfod.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/xxhash.h" #include +#include namespace llvm { static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); } diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt --- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt @@ -22,6 +22,7 @@ ELF.cpp ELFLinkGraphBuilder.cpp ELF_aarch64.cpp + ELF_i386.cpp ELF_riscv.cpp ELF_x86_64.cpp @@ -33,6 +34,7 @@ # Architectures: aarch64.cpp + i386.cpp riscv.cpp x86_64.cpp diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp --- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp @@ -14,6 +14,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h" +#include "llvm/ExecutionEngine/JITLink/ELF_i386.h" #include "llvm/ExecutionEngine/JITLink/ELF_riscv.h" #include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h" #include "llvm/Object/ELF.h" @@ -71,6 +72,8 @@ return createLinkGraphFromELFObject_riscv(ObjectBuffer); case ELF::EM_X86_64: return createLinkGraphFromELFObject_x86_64(ObjectBuffer); + case ELF::EM_386: + return createLinkGraphFromELFObject_i386(ObjectBuffer); default: return make_error( "Unsupported target machine architecture in ELF object " + @@ -91,6 +94,9 @@ case Triple::x86_64: link_ELF_x86_64(std::move(G), std::move(Ctx)); return; + case Triple::x86: + link_ELF_i386(std::move(G), std::move(Ctx)); + return; default: Ctx->notifyFailed(make_error( "Unsupported target machine architecture in ELF link graph " + diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp @@ -0,0 +1,114 @@ +//===----- ELF_i386.cpp - JIT linker implementation for ELF/i386 ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// ELF/i386 jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/ELF_i386.h" +#include "ELFLinkGraphBuilder.h" +#include "JITLinkGeneric.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/i386.h" +#include "llvm/Object/ELFObjectFile.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm; +using namespace llvm::jitlink; + +namespace llvm { +namespace jitlink { + +class ELFJITLinker_i386 : public JITLinker { + friend class JITLinker; + +public: + ELFJITLinker_i386(std::unique_ptr Ctx, + std::unique_ptr G, PassConfiguration PassConfig) + : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {} + +private: + Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { + using namespace i386; + using namespace llvm::support; + + switch (E.getKind()) { + case i386::None: { + break; + } + } + return Error::success(); + } +}; + +template +class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder { +private: + static Expected getRelocationKind(const uint32_t Type) { + using namespace i386; + switch (Type) { + case ELF::R_386_NONE: + return EdgeKind_i386::None; + } + + return make_error("Unsupported i386 relocation:" + + formatv("{0:d}", Type)); + } + + Error addRelocations() override { + LLVM_DEBUG(dbgs() << "Adding relocations\n"); + return Error::success(); + } + +public: + ELFLinkGraphBuilder_i386(StringRef FileName, const object::ELFFile &Obj, + const Triple T) + : ELFLinkGraphBuilder(Obj, std::move(T), FileName, + i386::getEdgeKindName) {} +}; + +Expected> +createLinkGraphFromELFObject_i386(MemoryBufferRef ObjectBuffer) { + LLVM_DEBUG({ + dbgs() << "Building jitlink graph for new input " + << ObjectBuffer.getBufferIdentifier() << "...\n"; + }); + + auto ELFObj = object::ObjectFile::createELFObjectFile(ObjectBuffer); + if (!ELFObj) + return ELFObj.takeError(); + + assert((*ELFObj)->getArch() == Triple::x86 && + "Only i386 (little endian) is supported for now"); + + auto &ELFObjFile = cast>(**ELFObj); + return ELFLinkGraphBuilder_i386((*ELFObj)->getFileName(), + ELFObjFile.getELFFile(), + (*ELFObj)->makeTriple()) + .buildGraph(); +} + +void link_ELF_i386(std::unique_ptr G, + std::unique_ptr Ctx) { + PassConfiguration Config; + const Triple &TT = G->getTargetTriple(); + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + Config.PrePrunePasses.push_back(std::move(MarkLive)); + else + Config.PrePrunePasses.push_back(markAllSymbolsLive); + } + if (auto Err = Ctx->modifyPassConfig(*G, Config)) + return Ctx->notifyFailed(std::move(Err)); + + ELFJITLinker_i386::link(std::move(Ctx), std::move(G), std::move(Config)); +} + +} // namespace jitlink +} // namespace llvm \ No newline at end of file diff --git a/llvm/lib/ExecutionEngine/JITLink/i386.cpp b/llvm/lib/ExecutionEngine/JITLink/i386.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/i386.cpp @@ -0,0 +1,30 @@ +//===---- i386.cpp - Generic JITLink i386 edge kinds, utilities -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing i386 objects. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/i386.h" + +#define DEBUG_TYPE "jitlink" + +namespace llvm { +namespace jitlink { +namespace i386 { + +const char *getEdgeKindName(Edge::Kind K) { + switch (K) { + case None: + return "None"; + } + return getGenericEdgeKindName(K); +} +} // namespace i386 +} // namespace jitlink +} // namespace llvm \ No newline at end of file diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/JITLink/x86_64.h" #include "llvm/ExecutionEngine/Orc/Layer.h" #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" #include "llvm/IR/Constants.h" @@ -350,7 +351,6 @@ Error StaticLibraryDefinitionGenerator::tryToGenerate( LookupState &LS, LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) { - // Don't materialize symbols from static archives unless this is a static // lookup. if (K != LookupKind::Static) @@ -430,5 +430,121 @@ Err = buildObjectFilesMap(); } +std::unique_ptr +DLLImportDefinitionGenerator::Create(ExecutionSession &ES, + ObjectLinkingLayer &L) { + return std::unique_ptr( + new DLLImportDefinitionGenerator(ES, L)); +} + +Error DLLImportDefinitionGenerator::tryToGenerate( + LookupState &LS, LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) { + JITDylibSearchOrder LinkOrder; + JD.withLinkOrderDo([&](const JITDylibSearchOrder &LO) { + LinkOrder.reserve(LO.size()); + for (auto &KV : LO) { + if (KV.first == &JD) + continue; + LinkOrder.push_back(KV); + } + }); + + // FIXME: if regular symbol name start with __imp_ we have to issue lookup of + // both __imp_ and stripped name and use the lookup information to resolve the + // real symbol name. + SymbolLookupSet LookupSet; + DenseMap ToLookUpSymbols; + for (auto &KV : Symbols) { + StringRef Deinterned = *KV.first; + if (Deinterned.startswith(getImpPrefix())) + Deinterned = Deinterned.drop_front(StringRef(getImpPrefix()).size()); + // Don't degrade the required state + if (ToLookUpSymbols.count(Deinterned) && + ToLookUpSymbols[Deinterned] == SymbolLookupFlags::RequiredSymbol) + continue; + ToLookUpSymbols[Deinterned] = KV.second; + } + + for (auto &KV : ToLookUpSymbols) + LookupSet.add(ES.intern(KV.first), KV.second); + + auto Resolved = + ES.lookup(LinkOrder, LookupSet, LookupKind::DLSym, SymbolState::Resolved); + if (!Resolved) + return Resolved.takeError(); + + auto G = createStubsGraph(*Resolved); + if (!G) + return G.takeError(); + return L.add(JD, std::move(*G)); +} + +Expected +DLLImportDefinitionGenerator::getTargetPointerSize(const Triple &TT) { + switch (TT.getArch()) { + case Triple::x86_64: + return 8; + default: + return make_error( + "architecture unsupported by DLLImportDefinitionGenerator", + inconvertibleErrorCode()); + } +} + +Expected +DLLImportDefinitionGenerator::getTargetEndianness(const Triple &TT) { + switch (TT.getArch()) { + case Triple::x86_64: + return support::endianness::little; + default: + return make_error( + "architecture unsupported by DLLImportDefinitionGenerator", + inconvertibleErrorCode()); + } +} + +Expected> +DLLImportDefinitionGenerator::createStubsGraph(const SymbolMap &Resolved) { + Triple TT = ES.getExecutorProcessControl().getTargetTriple(); + auto PointerSize = getTargetEndianness(TT); + if (!PointerSize) + return PointerSize.takeError(); + auto Endianness = getTargetEndianness(TT); + if (!Endianness) + return Endianness.takeError(); + + auto G = std::make_unique( + "", TT, *PointerSize, *Endianness, + jitlink::getGenericEdgeKindName); + jitlink::Section &Sec = G->createSection( + getSectionName(), jitlink::MemProt::Read | jitlink::MemProt::Exec); + + for (auto &KV : Resolved) { + jitlink::Symbol &Target = G->addAbsoluteSymbol( + *KV.first, ExecutorAddr(KV.second.getAddress()), *PointerSize, + jitlink::Linkage::Strong, jitlink::Scope::Local, false); + + // Create __imp_ symbol + jitlink::Symbol &Ptr = + jitlink::x86_64::createAnonymousPointer(*G, Sec, &Target); + auto NameCopy = G->allocateString(Twine(getImpPrefix()) + *KV.first); + StringRef NameCopyRef = StringRef(NameCopy.data(), NameCopy.size()); + Ptr.setName(NameCopyRef); + Ptr.setLinkage(jitlink::Linkage::Strong); + Ptr.setScope(jitlink::Scope::Default); + + // Create PLT stub + // FIXME: check PLT stub of data symbol is not accessed + jitlink::Block &StubBlock = + jitlink::x86_64::createPointerJumpStubBlock(*G, Sec, Ptr); + G->addDefinedSymbol(StubBlock, 0, *KV.first, StubBlock.getSize(), + jitlink::Linkage::Strong, jitlink::Scope::Default, true, + false); + } + + return std::move(G); +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -412,21 +412,20 @@ // MDNode. This loop also initializes DILocationReachable, later // needed by updateLoopMetadataDebugLocationsImpl; the use of // count_if avoids an early exit. - if (!std::count_if(N->op_begin() + 1, N->op_end(), - [&Visited, &DILocationReachable](const MDOperand &Op) { - return isDILocationReachable( - Visited, DILocationReachable, Op.get()); - })) + if (llvm::none_of(llvm::drop_begin(N->operands()), + [&Visited, &DILocationReachable](const MDOperand &Op) { + return isDILocationReachable(Visited, DILocationReachable, + Op.get()); + })) return N; // If there is only the debug location without any actual loop metadata, we // can remove the metadata. - if (std::all_of( - N->op_begin() + 1, N->op_end(), - [&Visited, &DILocationReachable](const MDOperand &Op) { - return isDILocationReachable(Visited, DILocationReachable, - Op.get()); - })) + if (llvm::all_of(llvm::drop_begin(N->operands()), + [&Visited, &DILocationReachable](const MDOperand &Op) { + return isDILocationReachable(Visited, DILocationReachable, + Op.get()); + })) return nullptr; return updateLoopMetadataDebugLocationsImpl( diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -869,7 +869,7 @@ // Remap compilation directory. remapDebugPath(CompilationDir); - // Remap MCDwarfDirs in all compilation units. + // Remap MCDwarfDirs and RootFile.Name in all compilation units. SmallString<256> P; for (auto &CUIDTablePair : MCDwarfLineTablesCUMap) { for (auto &Dir : CUIDTablePair.second.getMCDwarfDirs()) { @@ -877,6 +877,12 @@ remapDebugPath(P); Dir = std::string(P); } + + // Used by DW_TAG_compile_unit's DT_AT_name and DW_TAG_label's + // DW_AT_decl_file for DWARF v5 generated for assembly source. + P = CUIDTablePair.second.getRootFile().Name; + remapDebugPath(P); + CUIDTablePair.second.getRootFile().Name = std::string(P); } } diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp --- a/llvm/lib/Support/ARMAttributeParser.cpp +++ b/llvm/lib/Support/ARMAttributeParser.cpp @@ -9,6 +9,8 @@ #include "llvm/Support/ARMAttributeParser.h" #include "llvm/ADT/STLArrayExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/ScopedPrinter.h" using namespace llvm; @@ -62,6 +64,7 @@ ATTRIBUTE_HANDLER(PACRET_use), ATTRIBUTE_HANDLER(BTI_use), ATTRIBUTE_HANDLER(nodefaults), + ATTRIBUTE_HANDLER(also_compatible_with), }; #undef ATTRIBUTE_HANDLER @@ -81,15 +84,15 @@ return Error::success(); } +static const char *CPU_arch_strings[] = { + "Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", + "ARM v6", "ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", + "ARM v6S-M", "ARM v7E-M", "ARM v8-A", "ARM v8-R", "ARM v8-M Baseline", + "ARM v8-M Mainline", nullptr, nullptr, nullptr, "ARM v8.1-M Mainline", + "ARM v9-A"}; + Error ARMAttributeParser::CPU_arch(AttrType tag) { - static const char *strings[] = { - "Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6", - "ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M", - "ARM v7E-M", "ARM v8-A", "ARM v8-R", - "ARM v8-M Baseline", "ARM v8-M Mainline", nullptr, nullptr, nullptr, - "ARM v8.1-M Mainline", "ARM v9-A" - }; - return parseStringAttribute("CPU_arch", tag, makeArrayRef(strings)); + return parseStringAttribute("CPU_arch", tag, makeArrayRef(CPU_arch_strings)); } Error ARMAttributeParser::CPU_arch_profile(AttrType tag) { @@ -380,6 +383,84 @@ return Error::success(); } +Error ARMAttributeParser::also_compatible_with(AttrType tag) { + // Parse value as a C string first in order to print it in escaped form later. + // Then, parse it again to catch errors or to pretty print if Tag_CPU_arch. + Optional returnValue; + + SmallString<8> Description; + raw_svector_ostream DescStream(Description); + + uint64_t InitialOffset = cursor.tell(); + StringRef RawStringValue = de.getCStrRef(cursor); + uint64_t FinalOffset = cursor.tell(); + cursor.seek(InitialOffset); + uint64_t InnerTag = de.getULEB128(cursor); + + bool ValidInnerTag = + any_of(tagToStringMap, [InnerTag](const TagNameItem &Item) { + return Item.attr == InnerTag; + }); + + if (!ValidInnerTag) { + returnValue = + createStringError(errc::argument_out_of_domain, + Twine(InnerTag) + " is not a valid tag number"); + } else { + switch (InnerTag) { + case ARMBuildAttrs::CPU_arch: { + uint64_t InnerValue = de.getULEB128(cursor); + auto strings = makeArrayRef(CPU_arch_strings); + if (InnerValue >= strings.size()) { + returnValue = createStringError( + errc::argument_out_of_domain, + Twine(InnerValue) + " is not a valid " + + ELFAttrs::attrTypeAsString(InnerTag, tagToStringMap) + + " value"); + } else { + DescStream << ELFAttrs::attrTypeAsString(InnerTag, tagToStringMap) + << " = " << InnerValue; + if (strings[InnerValue]) + DescStream << " (" << strings[InnerValue] << ')'; + } + break; + } + case ARMBuildAttrs::also_compatible_with: + returnValue = createStringError( + errc::invalid_argument, + ELFAttrs::attrTypeAsString(InnerTag, tagToStringMap) + + " cannot be recursively defined"); + break; + case ARMBuildAttrs::CPU_raw_name: + case ARMBuildAttrs::CPU_name: + case ARMBuildAttrs::compatibility: + case ARMBuildAttrs::conformance: { + StringRef InnerValue = de.getCStrRef(cursor); + DescStream << ELFAttrs::attrTypeAsString(InnerTag, tagToStringMap) + << " = " << InnerValue; + break; + } + default: { + uint64_t InnerValue = de.getULEB128(cursor); + DescStream << ELFAttrs::attrTypeAsString(InnerTag, tagToStringMap) + << " = " << InnerValue; + } + } + } + + DictScope scope(*sw, "Attribute"); + sw->printNumber("Tag", tag); + sw->printString("TagName", + ELFAttrs::attrTypeAsString(tag, tagToStringMap, false)); + sw->printStringEscaped("Value", RawStringValue); + if (!Description.empty()) { + sw->printString("Description", Description); + } + cursor.seek(FinalOffset); + + return returnValue ? std::move(*returnValue) : Error::success(); +} + Error ARMAttributeParser::handler(uint64_t tag, bool &handled) { handled = false; for (unsigned AHI = 0, AHE = array_lengthof(displayRoutines); AHI != AHE; diff --git a/llvm/lib/Support/ARMBuildAttrs.cpp b/llvm/lib/Support/ARMBuildAttrs.cpp --- a/llvm/lib/Support/ARMBuildAttrs.cpp +++ b/llvm/lib/Support/ARMBuildAttrs.cpp @@ -75,40 +75,4 @@ constexpr TagNameMap ARMAttributeTags{tagData}; const TagNameMap &llvm::ARMBuildAttrs::getARMAttributeTags() { return ARMAttributeTags; -} - -static std::string getEncodedULEB128AsText(const uint8_t *Value, - unsigned Size) { - std::stringstream SS; - for (unsigned i = 0; i < Size; ++i) { - SS << "\\" << std::setfill('0') << std::setw(3) << std::oct - << int(Value[i]); - } - return SS.str(); -} - -std::string -llvm::ARMBuildAttrs::encodeAttrTagValuePair(StringRef OriginalString) { - auto BytesBegin = reinterpret_cast(OriginalString.data()); - auto BytesEnd = BytesBegin + OriginalString.size(); - - unsigned N = 0; - const char *Error = nullptr; - unsigned Tag = decodeULEB128(BytesBegin, &N, BytesEnd, &Error); - if (Error) - report_fatal_error("Could not decode Tag value: " + Twine(Error)); - - std::string EncodedPair = getEncodedULEB128AsText(BytesBegin, N); - switch (Tag) { - case ARMBuildAttrs::CPU_raw_name: - case ARMBuildAttrs::CPU_name: - case ARMBuildAttrs::compatibility: - case ARMBuildAttrs::conformance: - EncodedPair += OriginalString.substr(N); - break; - default: - EncodedPair += - getEncodedULEB128AsText(BytesBegin + N, OriginalString.size() - N); - } - return EncodedPair; -} +} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -450,6 +450,7 @@ STZ2G, LDP, + LDNP, STP, STNP, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -792,6 +792,17 @@ setOperationAction(ISD::STORE, MVT::v4f64, Custom); setOperationAction(ISD::STORE, MVT::v4i64, Custom); + // 256 bit non-temporal loads can be lowered to LDNP. This is done using + // custom lowering, as there are no un-paired non-temporal loads legalization + // will break up 256 bit inputs. + setOperationAction(ISD::LOAD, MVT::v32i8, Custom); + setOperationAction(ISD::LOAD, MVT::v16i16, Custom); + setOperationAction(ISD::LOAD, MVT::v16f16, Custom); + setOperationAction(ISD::LOAD, MVT::v8i32, Custom); + setOperationAction(ISD::LOAD, MVT::v8f32, Custom); + setOperationAction(ISD::LOAD, MVT::v4f64, Custom); + setOperationAction(ISD::LOAD, MVT::v4i64, Custom); + // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. // This requires the Performance Monitors extension. if (Subtarget->hasPerfMon()) @@ -2314,6 +2325,7 @@ MAKE_CASE(AArch64ISD::SSTNT1_PRED) MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED) MAKE_CASE(AArch64ISD::LDP) + MAKE_CASE(AArch64ISD::LDNP) MAKE_CASE(AArch64ISD::STP) MAKE_CASE(AArch64ISD::STNP) MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU) @@ -8235,15 +8247,17 @@ Swap = true; } } - // 64-bit check whether we can use CSINC. To avoid signed integer - // overflow the condition ignores wrap around, which is already - // handled by CSINV above. - } else if (1 == - std::max(TrueVal, FalseVal) - std::min(TrueVal, FalseVal)) { - Opcode = AArch64ISD::CSINC; - - if (TrueVal > FalseVal) { - Swap = true; + } else { + // 64-bit check whether we can use CSINC. + const uint64_t TrueVal64 = TrueVal; + const uint64_t FalseVal64 = FalseVal; + + if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) { + Opcode = AArch64ISD::CSINC; + + if (TrueVal > FalseVal) { + Swap = true; + } } } @@ -18406,6 +18420,54 @@ BitWidthMinusOne); } +// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) x)) => (CSEL l r cc2 cond) +// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) y)) => (CSEL l r !cc2 cond) +// Where cc1 is any reflexive relation (eg EQ) + +// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) x)) => (CSEL l r !cc2 cond) +// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) y)) => (CSEL l r cc2 cond) +// Where cc1 is any irreflexive relation (eg NE) +static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) { + SDValue L = Op->getOperand(0); + SDValue R = Op->getOperand(1); + AArch64CC::CondCode OpCC = + static_cast(Op->getConstantOperandVal(2)); + + SDValue OpCmp = Op->getOperand(3); + if (!isCMP(OpCmp)) + return SDValue(); + + SDValue CmpLHS = OpCmp.getOperand(0); + SDValue CmpRHS = OpCmp.getOperand(1); + + if (CmpRHS.getOpcode() == AArch64ISD::CSEL) + std::swap(CmpLHS, CmpRHS); + else if (CmpLHS.getOpcode() != AArch64ISD::CSEL) + return SDValue(); + + SDValue X = CmpLHS->getOperand(0); + SDValue Y = CmpLHS->getOperand(1); + AArch64CC::CondCode CC = + static_cast(CmpLHS->getConstantOperandVal(2)); + SDValue Cond = CmpLHS->getOperand(3); + + if (CmpRHS == Y) + CC = AArch64CC::getInvertedCondCode(CC); + else if (CmpRHS != X) + return SDValue(); + + if (AArch64CC::isIrreflexive(OpCC)) + CC = AArch64CC::getInvertedCondCode(CC); + else if (!AArch64CC::isReflexive(OpCC)) + return SDValue(); + + SDLoc DL(Op); + EVT VT = Op->getValueType(0); + + SDValue CCValue = DAG.getConstant(CC, DL, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond); +} + // Optimize CSEL instructions static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -18414,6 +18476,9 @@ if (N->getOperand(0) == N->getOperand(1)) return N->getOperand(0); + if (SDValue R = foldCSELOfCSEL(N, DAG)) + return R; + // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1 if (SDValue Folded = foldCSELofCTTZ(N, DAG)) @@ -20404,9 +20469,29 @@ return; case ISD::ATOMIC_LOAD: case ISD::LOAD: { - assert(SDValue(N, 0).getValueType() == MVT::i128 && - "unexpected load's value type"); MemSDNode *LoadNode = cast(N); + EVT MemVT = LoadNode->getMemoryVT(); + // Handle lowering 256 bit non temporal loads into LDNP. + if (LoadNode->isNonTemporal() && MemVT.getSizeInBits() == 256u && + (MemVT.getScalarSizeInBits() == 8u || + MemVT.getScalarSizeInBits() == 16u || + MemVT.getScalarSizeInBits() == 32u || + MemVT.getScalarSizeInBits() == 64u)) { + + SDValue Result = DAG.getMemIntrinsicNode( + AArch64ISD::LDNP, SDLoc(N), + DAG.getVTList({MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), + MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), + MVT::Other}), + {LoadNode->getChain(), LoadNode->getBasePtr()}, + LoadNode->getMemoryVT(), LoadNode->getMemOperand()); + + SDValue Pair = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), MemVT, + Result.getValue(0), Result.getValue(1)); + Results.append({Pair, Result.getValue(2) /* Chain */}); + return; + } + if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) || LoadNode->getMemoryVT() != MVT::i128) { // Non-volatile or atomic loads are optimized later in AArch64's load/store @@ -20414,15 +20499,17 @@ return; } - SDValue Result = DAG.getMemIntrinsicNode( - AArch64ISD::LDP, SDLoc(N), - DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}), - {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(), - LoadNode->getMemOperand()); - - SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, - Result.getValue(0), Result.getValue(1)); - Results.append({Pair, Result.getValue(2) /* Chain */}); + if (SDValue(N, 0).getValueType() == MVT::i128) { + SDValue Result = DAG.getMemIntrinsicNode( + AArch64ISD::LDP, SDLoc(N), + DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}), + {LoadNode->getChain(), LoadNode->getBasePtr()}, + LoadNode->getMemoryVT(), LoadNode->getMemOperand()); + + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, + Result.getValue(0), Result.getValue(1)); + Results.append({Pair, Result.getValue(2) /* Chain */}); + } return; } case ISD::EXTRACT_SUBVECTOR: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -318,6 +318,7 @@ def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; @@ -728,6 +729,7 @@ def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -2581,6 +2583,8 @@ def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; +def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), + (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; //--- // (register offset) //--- diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -331,6 +331,41 @@ case LE: return Z; // Z == 1 || N != V } } + +/// Return true if Code is a reflexive relationship: +/// forall x. (CSET Code (CMP x x)) == 1 +inline static bool isReflexive(CondCode Code) { + switch (Code) { + case EQ: + case HS: + case PL: + case LS: + case GE: + case LE: + case AL: + case NV: + return true; + default: + return false; + } +} + +/// Return true if Code is an irreflexive relationship: +/// forall x. (CSET Code (CMP x x)) == 0 +inline static bool isIrreflexive(CondCode Code) { + switch (Code) { + case NE: + case LO: + case MI: + case HI: + case LT: + case GT: + return true; + default: + return false; + } +} + } // end namespace AArch64CC struct SysAlias { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -338,9 +338,9 @@ static bool canFitIntoPipeline(SUnit &SU, ScheduleDAGInstrs *DAG, DenseSet &ConflictedInstrs) { - return std::all_of( - ConflictedInstrs.begin(), ConflictedInstrs.end(), - [DAG, &SU](SUnit *SuccSU) { return DAG->canAddEdge(SuccSU, &SU); }); + return llvm::all_of(ConflictedInstrs, [DAG, &SU](SUnit *SuccSU) { + return DAG->canAddEdge(SuccSU, &SU); + }); } void SchedGroup::initSchedGroup(std::vector::reverse_iterator RIter, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -7679,10 +7679,9 @@ // extend that single value SDValue FirstOp = Op.getOperand(0); if (!isa(FirstOp) && - std::all_of(std::next(Op->op_begin()), Op->op_end(), - [&FirstOp](SDUse &U) { - return U.get().isUndef() || U.get() == FirstOp; - })) { + llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) { + return U.get().isUndef() || U.get() == FirstOp; + })) { SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp, DAG.getValueType(MVT::i1)); return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -204,7 +204,7 @@ default: OS << "\t.eabi_attribute\t" << Attribute << ", \""; if (Attribute == ARMBuildAttrs::also_compatible_with) - OS << ARMBuildAttrs::encodeAttrTagValuePair(String); + OS.write_escaped(String); else OS << String; OS << "\""; diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -648,45 +648,28 @@ MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; Register DstReg = MI.getOperand(0).getReg(); - Register TmpReg = 0; // 0 for no temporary register Register SrcReg = MI.getOperand(1).getReg(); bool SrcIsKill = MI.getOperand(1).isKill(); unsigned OpLo = AVR::LDRdPtr; unsigned OpHi = AVR::LDDRdPtrQ; TRI->splitReg(DstReg, DstLoReg, DstHiReg); - // Use a temporary register if src and dst registers are the same. - if (DstReg == SrcReg) - TmpReg = scavengeGPR8(MI); - - Register CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; - Register CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; + // DstReg has an earlyclobber so the register allocator will allocate them in + // separate registers. + assert(DstReg != SrcReg && "Dst and Src registers are the same!"); // Load low byte. - auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(CurDstLoReg, RegState::Define) - .addReg(SrcReg); - - // Push low byte onto stack if necessary. - if (TmpReg) - buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); + buildMI(MBB, MBBI, OpLo) + .addReg(DstLoReg, RegState::Define) + .addReg(SrcReg) + .setMemRefs(MI.memoperands()); // Load high byte. - auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(CurDstHiReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(1); - - if (TmpReg) { - // Move the high byte into the final destination. - buildMI(MBB, MBBI, AVR::MOVRdRr, DstHiReg).addReg(TmpReg); - - // Move the low byte from the scratch space into the final destination. - buildMI(MBB, MBBI, AVR::POPRd, DstLoReg); - } - - MIBLO.setMemRefs(MI.memoperands()); - MIBHI.setMemRefs(MI.memoperands()); + buildMI(MBB, MBBI, OpHi) + .addReg(DstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(1) + .setMemRefs(MI.memoperands()); MI.eraseFromParent(); return true; @@ -763,7 +746,6 @@ MachineInstr &MI = *MBBI; Register DstLoReg, DstHiReg; Register DstReg = MI.getOperand(0).getReg(); - Register TmpReg = 0; // 0 for no temporary register Register SrcReg = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); bool SrcIsKill = MI.getOperand(1).isKill(); @@ -775,39 +757,23 @@ // highest Imm value allowed for the instruction, 62 is the limit here. assert(Imm <= 62 && "Offset is out of range"); - // Use a temporary register if src and dst registers are the same. - if (DstReg == SrcReg) - TmpReg = scavengeGPR8(MI); - - Register CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; - Register CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; + // DstReg has an earlyclobber so the register allocator will allocate them in + // separate registers. + assert(DstReg != SrcReg && "Dst and Src registers are the same!"); // Load low byte. - auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(CurDstLoReg, RegState::Define) - .addReg(SrcReg) - .addImm(Imm); - - // Push low byte onto stack if necessary. - if (TmpReg) - buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); + buildMI(MBB, MBBI, OpLo) + .addReg(DstLoReg, RegState::Define) + .addReg(SrcReg) + .addImm(Imm) + .setMemRefs(MI.memoperands()); // Load high byte. - auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(CurDstHiReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(Imm + 1); - - if (TmpReg) { - // Move the high byte into the final destination. - buildMI(MBB, MBBI, AVR::MOVRdRr, DstHiReg).addReg(TmpReg); - - // Move the low byte from the scratch space into the final destination. - buildMI(MBB, MBBI, AVR::POPRd, DstLoReg); - } - - MIBLO.setMemRefs(MI.memoperands()); - MIBHI.setMemRefs(MI.memoperands()); + buildMI(MBB, MBBI, OpHi) + .addReg(DstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(Imm + 1) + .setMemRefs(MI.memoperands()); MI.eraseFromParent(); return true; @@ -1382,8 +1348,8 @@ .addReg(DstReg, getKillRegState(DstIsKill)) .addReg(ZERO_REGISTER); - // SREG is always implicitly killed - MIB->getOperand(2).setIsKill(); + MIB->getOperand(3).setIsDead(); // SREG is always dead + MIB->getOperand(4).setIsKill(); // SREG is always implicitly killed MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -56,6 +56,7 @@ const AVRSubtarget &STI = MF.getSubtarget(); const AVRInstrInfo &TII = *STI.getInstrInfo(); const AVRMachineFunctionInfo *AFI = MF.getInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); bool HasFP = hasFP(MF); // Interrupt handlers re-enable interrupts in function entry. @@ -68,8 +69,8 @@ // Emit special prologue code to save R1, R0 and SREG in interrupt/signal // handlers before saving any other registers. if (AFI->isInterruptOrSignalHandler()) { - BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) - .addReg(AVR::R1R0, RegState::Kill) + BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr)) + .addReg(AVR::R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0) @@ -78,11 +79,16 @@ BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr)) .addReg(AVR::R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr)) - .addReg(AVR::R1, RegState::Define) - .addReg(AVR::R1, RegState::Kill) - .addReg(AVR::R1, RegState::Kill) - .setMIFlag(MachineInstr::FrameSetup); + if (!MRI.reg_empty(AVR::R1)) { + BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr)) + .addReg(AVR::R1, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr)) + .addReg(AVR::R1, RegState::Define) + .addReg(AVR::R1, RegState::Kill) + .addReg(AVR::R1, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + } } // Early exit if the frame pointer is not needed in this function. @@ -132,6 +138,7 @@ static void restoreStatusRegister(MachineFunction &MF, MachineBasicBlock &MBB) { const AVRMachineFunctionInfo *AFI = MF.getInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -142,11 +149,14 @@ // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal // handlers at the very end of the function, just before reti. if (AFI->isInterruptOrSignalHandler()) { + if (!MRI.reg_empty(AVR::R1)) { + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R1); + } BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr)) .addImm(STI.getIORegSREG()) .addReg(AVR::R0, RegState::Kill); - BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0); + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0); } } diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -57,6 +57,8 @@ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand); + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + for (MVT VT : MVT::integer_valuetypes()) { for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { setLoadExtAction(N, VT, MVT::i1, Promote); @@ -836,6 +838,52 @@ MachinePointerInfo(SV)); } +// Modify the existing ISD::INLINEASM node to add the implicit register r1. +SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { + SDValue R1Reg = DAG.getRegister(AVR::R1, MVT::i8); + if (Op.getOperand(Op.getNumOperands() - 1) == R1Reg || + Op.getOperand(Op.getNumOperands() - 2) == R1Reg) { + // R1 has already been added. Don't add it again. + // If this isn't handled, we get called over and over again. + return Op; + } + + // Get a list of operands to the new INLINEASM node. This is mostly a copy, + // with some edits. + // Add the following operands at the end (but before the glue node, if it's + // there): + // - The flags of the implicit R1 register operand. + // - The implicit R1 register operand itself. + SDLoc dl(Op); + SmallVector Ops; + SDNode *N = Op.getNode(); + SDValue Glue; + for (unsigned I = 0; I < N->getNumOperands(); I++) { + SDValue Operand = N->getOperand(I); + if (Operand.getValueType() == MVT::Glue) { + // The glue operand always needs to be at the end, so we need to treat it + // specially. + Glue = Operand; + } else { + Ops.push_back(Operand); + } + } + unsigned Flags = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); + Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32)); + Ops.push_back(R1Reg); + if (Glue) { + Ops.push_back(Glue); + } + + // Replace the current INLINEASM node with a new one that has R1 as implicit + // parameter. + SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops); + DAG.ReplaceAllUsesOfValueWith(Op, New); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1)); + + return New; +} + SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: @@ -861,6 +909,8 @@ case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); + case ISD::INLINEASM: + return LowerINLINEASM(Op, DAG); } return SDValue(); @@ -1451,6 +1501,10 @@ Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); } + // The R1 register must be passed as an implicit register so that R1 is + // correctly zeroed in interrupts. + Ops.push_back(DAG.getRegister(AVR::R1, MVT::i8)); + // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = @@ -1572,6 +1626,14 @@ const AVRMachineFunctionInfo *AFI = MF.getInfo(); + if (!AFI->isInterruptOrSignalHandler()) { + // The return instruction has an implicit R1 operand: it must contain zero + // on return. + // This is not needed in interrupts however, where R1 is handled specially + // (only pushed/popped when needed). + RetOps.push_back(DAG.getRegister(AVR::R1, MVT::i8)); + } + unsigned RetOpc = AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_FLAG : AVRISD::RET_FLAG; diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -915,6 +915,7 @@ // neg Rd+1 // neg Rd // sbc Rd+1, r1 + let Uses = [R1] in def NEGWRd : Pseudo<(outs DREGS : $rd), (ins DREGS @@ -1986,6 +1987,7 @@ def ASRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrwlo\t$rd", [(set i16:$rd, (AVRasrlo i16:$src)), (implicit SREG)]>; + let Uses = [R1] in def ROLBRd : Pseudo<(outs GPR8 : $rd), (ins GPR8 diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -1263,13 +1263,8 @@ .addMBB(LoopStart).addImm(CountImm); } - // Make sure the loop start always has a reference in the CFG. We need - // to create a BlockAddress operand to get this mechanism to work both the - // MachineBasicBlock and BasicBlock objects need the flag set. - LoopStart->setHasAddressTaken(); - // This line is needed to set the hasAddressTaken flag on the BasicBlock - // object. - BlockAddress::get(const_cast(LoopStart->getBasicBlock())); + // Make sure the loop start always has a reference in the CFG. + LoopStart->setMachineBlockAddressTaken(); // Replace the loop branch with an endloop instruction. DebugLoc LastIDL = LastI->getDebugLoc(); diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -143,6 +143,7 @@ def : PatFprFpr; def : PatFprFpr; def : PatFpr; +def : PatFpr; /// Setcc diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -149,6 +149,7 @@ def : PatFprFpr; def : PatFprFpr; def : PatFpr; +def : PatFpr; /// Setcc diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt --- a/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt @@ -2,11 +2,13 @@ LoongArchAsmBackend.cpp LoongArchBaseInfo.cpp LoongArchELFObjectWriter.cpp + LoongArchELFStreamer.cpp LoongArchInstPrinter.cpp LoongArchMCAsmInfo.cpp LoongArchMCTargetDesc.cpp LoongArchMCCodeEmitter.cpp LoongArchMatInt.cpp + LoongArchTargetStreamer.cpp LINK_COMPONENTS MC diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h @@ -0,0 +1,31 @@ +//==-- LoongArchELFStreamer.h - LoongArch ELF Target Streamer --*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H + +#include "LoongArchTargetStreamer.h" +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + +class LoongArchTargetELFStreamer : public LoongArchTargetStreamer { +public: + MCELFStreamer &getStreamer(); + LoongArchTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); + + void finish() override; +}; + +MCELFStreamer *createLoongArchELFStreamer(MCContext &C, + std::unique_ptr MAB, + std::unique_ptr MOW, + std::unique_ptr MCE, + bool RelaxAll); +} // end namespace llvm +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp @@ -0,0 +1,95 @@ +//===-- LoongArchELFStreamer.cpp - LoongArch ELF Target Streamer Methods --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchELFStreamer.h" +#include "LoongArchAsmBackend.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +// This part is for ELF object output. +LoongArchTargetELFStreamer::LoongArchTargetELFStreamer( + MCStreamer &S, const MCSubtargetInfo &STI) + : LoongArchTargetStreamer(S) { + // FIXME: select appropriate ABI. + setTargetABI(STI.getTargetTriple().isArch64Bit() ? LoongArchABI::ABI_LP64D + : LoongArchABI::ABI_ILP32D); +} + +MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() { + return static_cast(Streamer); +} + +void LoongArchTargetELFStreamer::finish() { + LoongArchTargetStreamer::finish(); + MCAssembler &MCA = getStreamer().getAssembler(); + LoongArchABI::ABI ABI = getTargetABI(); + + // FIXME: + // There are several PRs [1][2][3] that may affect the e_flags. + // After they got closed or merged, we should update the implementation here + // accordingly. + // + // [1] https://github.com/loongson/LoongArch-Documentation/pull/33 + // [2] https://github.com/loongson/LoongArch-Documentation/pull/47 + // [2] https://github.com/loongson/LoongArch-Documentation/pull/61 + unsigned EFlags = MCA.getELFHeaderEFlags(); + switch (ABI) { + case LoongArchABI::ABI_ILP32S: + EFlags |= ELF::EF_LOONGARCH_BASE_ABI_ILP32S; + break; + case LoongArchABI::ABI_ILP32F: + EFlags |= ELF::EF_LOONGARCH_BASE_ABI_ILP32F; + break; + case LoongArchABI::ABI_ILP32D: + EFlags |= ELF::EF_LOONGARCH_BASE_ABI_ILP32D; + break; + case LoongArchABI::ABI_LP64S: + EFlags |= ELF::EF_LOONGARCH_BASE_ABI_LP64S; + break; + case LoongArchABI::ABI_LP64F: + EFlags |= ELF::EF_LOONGARCH_BASE_ABI_LP64F; + break; + case LoongArchABI::ABI_LP64D: + EFlags |= ELF::EF_LOONGARCH_BASE_ABI_LP64D; + break; + case LoongArchABI::ABI_Unknown: + llvm_unreachable("Improperly initialized target ABI"); + } + MCA.setELFHeaderEFlags(EFlags); +} + +namespace { +class LoongArchELFStreamer : public MCELFStreamer { +public: + LoongArchELFStreamer(MCContext &C, std::unique_ptr MAB, + std::unique_ptr MOW, + std::unique_ptr MCE) + : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {} +}; +} // end namespace + +namespace llvm { +MCELFStreamer *createLoongArchELFStreamer(MCContext &C, + std::unique_ptr MAB, + std::unique_ptr MOW, + std::unique_ptr MCE, + bool RelaxAll) { + LoongArchELFStreamer *S = new LoongArchELFStreamer( + C, std::move(MAB), std::move(MOW), std::move(MCE)); + S->getAssembler().setRelaxAll(RelaxAll); + return S; +} +} // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -12,13 +12,17 @@ #include "LoongArchMCTargetDesc.h" #include "LoongArchBaseInfo.h" +#include "LoongArchELFStreamer.h" #include "LoongArchInstPrinter.h" #include "LoongArchMCAsmInfo.h" #include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" @@ -76,6 +80,13 @@ return new LoongArchInstPrinter(MAI, MII, MRI); } +static MCTargetStreamer * +createLoongArchObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return STI.getTargetTriple().isOSBinFormatELF() + ? new LoongArchTargetELFStreamer(S, STI) + : nullptr; +} + namespace { class LoongArchMCInstrAnalysis : public MCInstrAnalysis { @@ -101,6 +112,17 @@ return new LoongArchMCInstrAnalysis(Info); } +namespace { +MCStreamer *createLoongArchELFStreamer(const Triple &T, MCContext &Context, + std::unique_ptr &&MAB, + std::unique_ptr &&MOW, + std::unique_ptr &&MCE, + bool RelaxAll) { + return createLoongArchELFStreamer(Context, std::move(MAB), std::move(MOW), + std::move(MCE), RelaxAll); +} +} // end namespace + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() { for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) { TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); @@ -111,5 +133,8 @@ TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter); TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchInstrAnalysis); + TargetRegistry::RegisterELFStreamer(*T, createLoongArchELFStreamer); + TargetRegistry::RegisterObjectTargetStreamer( + *T, createLoongArchObjectTargetStreamer); } } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.h @@ -0,0 +1,27 @@ +//===-- LoongArchTargetStreamer.h - LoongArch Target Streamer --*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHTARGETSTREAMER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHTARGETSTREAMER_H + +#include "LoongArch.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { +class LoongArchTargetStreamer : public MCTargetStreamer { + LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; + +public: + LoongArchTargetStreamer(MCStreamer &S); + void setTargetABI(LoongArchABI::ABI ABI); + LoongArchABI::ABI getTargetABI() const { return TargetABI; } +}; + +} // end namespace llvm +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp @@ -0,0 +1,24 @@ +//===-- LoongArchTargetStreamer.cpp - LoongArch Target Streamer Methods ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetStreamer.h" + +using namespace llvm; + +LoongArchTargetStreamer::LoongArchTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S) {} + +void LoongArchTargetStreamer::setTargetABI(LoongArchABI::ABI ABI) { + assert(ABI != LoongArchABI::ABI_Unknown && + "Improperly initialized target ABI"); + TargetABI = ABI; +} diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -39,8 +39,8 @@ // Do not save RA to the SCS if it's not saved to the regular stack, // i.e. RA is not at risk of being overwritten. std::vector &CSI = MF.getFrameInfo().getCalleeSavedInfo(); - if (std::none_of(CSI.begin(), CSI.end(), - [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; })) + if (llvm::none_of( + CSI, [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; })) return; Register SCSPReg = RISCVABI::getSCSPReg(); @@ -89,8 +89,8 @@ // See emitSCSPrologue() above. std::vector &CSI = MF.getFrameInfo().getCalleeSavedInfo(); - if (std::none_of(CSI.begin(), CSI.end(), - [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; })) + if (llvm::none_of( + CSI, [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; })) return; Register SCSPReg = RISCVABI::getSCSPReg(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1104,6 +1104,8 @@ // On RV32, 64-bit integers are split into their high and low parts and held // in two different registers, so the trunc is free since the low register can // just be used. +// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of +// isTruncateFree? bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) return false; @@ -1113,8 +1115,10 @@ } bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { - if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || - !SrcVT.isInteger() || !DstVT.isInteger()) + // We consider i64->i32 free on RV64 since we have good selection of W + // instructions that make promoting operations back to i64 free in many cases. + if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || + !DstVT.isInteger()) return false; unsigned SrcBits = SrcVT.getSizeInBits(); unsigned DestBits = DstVT.getSizeInBits(); @@ -8280,23 +8284,25 @@ SDValue N1 = N->getOperand(1); // Prefer to make this 'add 0/1' rather than 'sub 0/1' - // sub constant(!0), 0/1 -> add constant - 1, 1/0 - // NODE: constant == 0, No redundant instructions are generated. + // sub constant, 0/1 -> add constant - 1, 1/0 // (sub constant, (setcc x, y, eq/neq)) -> // (add (setcc x, y, neq/eq), constant - 1) - auto *Nnz0 = dyn_cast(N0); - if (Nnz0 && N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { - const auto *CC = cast(N1->getOperand(2)); - ISD::CondCode CCVal = CC->get(); - if (!Nnz0->isZero() && isIntEqualitySetCC(CCVal)) { + auto *N0C = dyn_cast(N0); + if (N0C && N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { + ISD::CondCode CCVal = cast(N1.getOperand(2))->get(); + EVT SetCCOpVT = N1.getOperand(0).getValueType(); + if (SetCCOpVT.isInteger() && isIntEqualitySetCC(CCVal)) { EVT VT = N->getValueType(0); - const APInt &ImmVal = Nnz0->getAPIntValue(); - SDValue CCInverse = - DAG.getCondCode(ISD::getSetCCInverse(CCVal, N0.getValueType())); - SDValue NewN0 = DAG.getNode(ISD::SETCC, SDLoc(N), VT, N1->getOperand(0), - N1->getOperand(1), CCInverse); - SDValue NewN1 = DAG.getConstant(ImmVal - 1, SDLoc(N), VT); - return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewN0, NewN1); + APInt ImmValMinus1 = N0C->getAPIntValue() - 1; + // If this doesn't form ADDI, the transform won't save any instructions + // and may increase the number of constants we need. + if (ImmValMinus1.isSignedIntN(12)) { + CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT); + SDValue NewN0 = DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), + N1.getOperand(1), CCVal); + SDValue NewN1 = DAG.getConstant(ImmValMinus1, SDLoc(N), VT); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewN0, NewN1); + } } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -131,18 +131,13 @@ !cast("WriteVSSSEG" #nf #"e" #eew), !cast("ReadVSTS" #eew #"V"), ReadVSTX, ReadVSTSX, ReadVMask]>; // Indexed Segment Loads and Stores -class VLUXSEGSched : Sched<[ - !cast("WriteVLUXSEG" #nf #"e" #eew), ReadVLDX, ReadVLDUXV, - ReadVMask]>; -class VLOXSEGSched : Sched<[ - !cast("WriteVLOXSEG" #nf #"e" #eew), ReadVLDX, ReadVLDOXV, - ReadVMask]>; -class VSUXSEGSched : Sched<[ - !cast("WriteVSUXSEG" #nf #"e" #eew), - !cast("ReadVSTUX" #eew), ReadVSTX, ReadVSTUXV, ReadVMask]>; -class VSOXSEGSched : Sched<[ - !cast("WriteVSOXSEG" #nf #"e" #eew), - !cast("ReadVSTOX" #eew), ReadVSTX, ReadVSTOXV, ReadVMask]>; +class VLXSEGSched : Sched<[ + !cast("WriteVL" #o # "XSEG" #nf #"e" #eew), ReadVLDX, + !cast("ReadVLD" # o # "XV"), ReadVMask]>; +class VSXSEGSched : Sched<[ + !cast("WriteVS" #o # "XSEG" #nf #"e" #eew), + !cast("ReadVST" #o # "X" #eew), ReadVSTX, + !cast("ReadVST" #o # "XV"), ReadVMask]>; //===----------------------------------------------------------------------===// // Instruction class templates @@ -1543,16 +1538,20 @@ // Vector Indexed Instructions def VLUXSEG#nf#EI#eew#_V : VIndexedSegmentLoad, VLUXSEGSched; + "vluxseg"#nf#"ei"#eew#".v">, + VLXSEGSched; def VLOXSEG#nf#EI#eew#_V : VIndexedSegmentLoad, VLOXSEGSched; + "vloxseg"#nf#"ei"#eew#".v">, + VLXSEGSched; def VSUXSEG#nf#EI#eew#_V : VIndexedSegmentStore, VSUXSEGSched; + "vsuxseg"#nf#"ei"#eew#".v">, + VSXSEGSched; def VSOXSEG#nf#EI#eew#_V : VIndexedSegmentStore, VSOXSEGSched; + "vsoxseg"#nf#"ei"#eew#".v">, + VSXSEGSched; } } } // Predicates = [HasVInstructions] @@ -1584,16 +1583,16 @@ // Vector Indexed Segment Instructions def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad, VLUXSEGSched; + "vluxseg"#nf#"ei64.v">, VLXSEGSched; def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad, VLOXSEGSched; + "vloxseg"#nf#"ei64.v">, VLXSEGSched; def VSUXSEG#nf#EI64_V : VIndexedSegmentStore, VSUXSEGSched; + "vsuxseg"#nf#"ei64.v">, VSXSEGSched; def VSOXSEG#nf#EI64_V : VIndexedSegmentStore, VSOXSEGSched; + "vsoxseg"#nf#"ei64.v">, VSXSEGSched; } } // Predicates = [HasVInstructionsI64, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2837,11 +2837,11 @@ foreach nf = NFSet.L in { defvar vreg = SegRegClass.RC; def nf # "E" # eew # "_V_" # LInfo : - VPseudoUSSegLoadNoMask; + VPseudoUSSegLoadNoMask, VLSEGSched; def nf # "E" # eew # "_V_" # LInfo # "_TU" : - VPseudoUSSegLoadNoMaskTU; + VPseudoUSSegLoadNoMaskTU, VLSEGSched; def nf # "E" # eew # "_V_" # LInfo # "_MASK" : - VPseudoUSSegLoadMask; + VPseudoUSSegLoadMask, VLSEGSched; } } } @@ -2856,11 +2856,11 @@ foreach nf = NFSet.L in { defvar vreg = SegRegClass.RC; def nf # "E" # eew # "FF_V_" # LInfo : - VPseudoUSSegLoadFFNoMask; + VPseudoUSSegLoadFFNoMask, VLSEGFFSched; def nf # "E" # eew # "FF_V_" # LInfo # "_TU" : - VPseudoUSSegLoadFFNoMaskTU; + VPseudoUSSegLoadFFNoMaskTU, VLSEGFFSched; def nf # "E" # eew # "FF_V_" # LInfo # "_MASK" : - VPseudoUSSegLoadFFMask; + VPseudoUSSegLoadFFMask, VLSEGFFSched; } } } @@ -2874,9 +2874,12 @@ let VLMul = lmul.value in { foreach nf = NFSet.L in { defvar vreg = SegRegClass.RC; - def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask; - def nf # "E" # eew # "_V_" # LInfo # "_TU" : VPseudoSSegLoadNoMaskTU; - def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask; + def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask, + VLSSEGSched; + def nf # "E" # eew # "_V_" # LInfo # "_TU" : VPseudoSSegLoadNoMaskTU, + VLSSEGSched; + def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask, + VLSSEGSched; } } } @@ -2896,18 +2899,22 @@ defvar idx_lmul = !cast("V_" # IdxLInfo); defvar Vreg = val_lmul.vrclass; defvar IdxVreg = idx_lmul.vrclass; + defvar Order = !if(Ordered, "O", "U"); let VLMul = val_lmul.value in { foreach nf = NFSet.L in { defvar ValVreg = SegRegClass.RC; def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo : VPseudoISegLoadNoMask; + nf, Ordered>, + VLXSEGSched; def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_TU" : VPseudoISegLoadNoMaskTU; + nf, Ordered>, + VLXSEGSched; def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" : VPseudoISegLoadMask; + nf, Ordered>, + VLXSEGSched; } } } @@ -2923,8 +2930,10 @@ let VLMul = lmul.value in { foreach nf = NFSet.L in { defvar vreg = SegRegClass.RC; - def nf # "E" # eew # "_V_" # LInfo : VPseudoUSSegStoreNoMask; - def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSSegStoreMask; + def nf # "E" # eew # "_V_" # LInfo : VPseudoUSSegStoreNoMask, + VSSEGSched; + def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSSegStoreMask, + VSSEGSched; } } } @@ -2938,8 +2947,10 @@ let VLMul = lmul.value in { foreach nf = NFSet.L in { defvar vreg = SegRegClass.RC; - def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegStoreNoMask; - def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegStoreMask; + def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegStoreNoMask, + VSSSEGSched; + def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegStoreMask, + VSSSEGSched; } } } @@ -2959,15 +2970,18 @@ defvar idx_lmul = !cast("V_" # IdxLInfo); defvar Vreg = val_lmul.vrclass; defvar IdxVreg = idx_lmul.vrclass; + defvar Order = !if(Ordered, "O", "U"); let VLMul = val_lmul.value in { foreach nf = NFSet.L in { defvar ValVreg = SegRegClass.RC; def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo : VPseudoISegStoreNoMask; + nf, Ordered>, + VSXSEGSched; def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" : VPseudoISegStoreMask; + nf, Ordered>, + VSXSEGSched; } } } diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -2214,7 +2214,7 @@ MF->insert(I, MainMBB); MF->insert(I, SinkMBB); MF->push_back(RestoreMBB); - RestoreMBB->setHasAddressTaken(); + RestoreMBB->setMachineBlockAddressTaken(); // Transfer the remainder of BB and its successor edges to SinkMBB. SinkMBB->splice(SinkMBB->begin(), MBB, diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2777,7 +2777,7 @@ // Record that we've taken the address of CatchRetTarget and no longer just // reference it in a terminator. - CatchRetTarget->setHasAddressTaken(); + CatchRetTarget->setMachineBlockAddressTaken(); } bool X86FrameLowering::restoreCalleeSavedRegisters( diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27038,7 +27038,7 @@ // Avoid false dependency. if (PassThru.isUndef()) - PassThru = DAG.getConstant(0, dl, VT); + PassThru = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru, Mask); @@ -29735,8 +29735,22 @@ uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); - if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) + if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) { + // Hardware support for vector shifts is sparse which makes us scalarize the + // vector operations in many cases. Also, on sandybridge ADD is faster than + // shl: (shl V, 1) -> (add (freeze V), (freeze V)) + if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) { + // R may be undef at run-time, but (shl R, 1) must be an even number (LSB + // must be 0). (add undef, undef) however can be any value. To make this + // safe, we must freeze R to ensure that register allocation uses the same + // register for an undefined value. This ensures that the result will + // still be even and preserves the original semantics. + R = DAG.getFreeze(R); + return DAG.getNode(ISD::ADD, dl, VT, R, R); + } + return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + } // i64 SRA needs to be performed as partial shifts. if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || @@ -35626,7 +35640,7 @@ MF->insert(I, mainMBB); MF->insert(I, sinkMBB); MF->push_back(restoreMBB); - restoreMBB->setHasAddressTaken(); + restoreMBB->setMachineBlockAddressTaken(); MachineInstrBuilder MIB; @@ -46674,20 +46688,6 @@ } } - // Hardware support for vector shifts is sparse which makes us scalarize the - // vector operations in many cases. Also, on sandybridge ADD is faster than - // shl. - // (shl V, 1) -> add V,V - if (auto *N1BV = dyn_cast(N1)) - if (auto *N1SplatC = N1BV->getConstantSplatNode()) { - assert(N0.getValueType().isVector() && "Invalid vector shift type"); - // We shift all of the values by one. In many cases we do not have - // hardware support for this operation. This is better expressed as an ADD - // of two values. - if (N1SplatC->isOne()) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); - } - return SDValue(); } @@ -47269,12 +47269,18 @@ TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); - assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) || - (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16) || - N->getOpcode() == ISD::INSERT_VECTOR_ELT) && + unsigned Opcode = N->getOpcode(); + assert(((Opcode == X86ISD::PINSRB && VT == MVT::v16i8) || + (Opcode == X86ISD::PINSRW && VT == MVT::v8i16) || + Opcode == ISD::INSERT_VECTOR_ELT) && "Unexpected vector insertion"); - if (N->getOpcode() == X86ISD::PINSRB || N->getOpcode() == X86ISD::PINSRW) { + // Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt). + if (Opcode == ISD::INSERT_VECTOR_ELT && N->getOperand(0).isUndef() && + isNullConstant(N->getOperand(2))) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, N->getOperand(1)); + + if (Opcode == X86ISD::PINSRB || Opcode == X86ISD::PINSRW) { unsigned NumBitsPerElt = VT.getScalarSizeInBits(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(SDValue(N, 0), diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp --- a/llvm/lib/Target/X86/X86IndirectThunks.cpp +++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -234,11 +234,11 @@ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); - CaptureSpec->setHasAddressTaken(); + CaptureSpec->setMachineBlockAddressTaken(); CaptureSpec->addSuccessor(CaptureSpec); CallTarget->addLiveIn(ThunkReg); - CallTarget->setHasAddressTaken(); + CallTarget->setMachineBlockAddressTaken(); CallTarget->setAlignment(Align(16)); // Insert return address clobber diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp --- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -1145,7 +1145,7 @@ // Insert a comparison of the incoming target register with this block's // address. This also requires us to mark the block as having its address // taken explicitly. - MBB.setHasAddressTaken(); + MBB.setMachineBlockAddressTaken(); auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); if (MF.getTarget().getCodeModel() == CodeModel::Small && !Subtarget->isPositionIndependent()) { diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -546,54 +546,8 @@ writeThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash); } -class WriteThinLTOBitcode : public ModulePass { - raw_ostream &OS; // raw_ostream to print on - // The output stream on which to emit a minimized module for use - // just in the thin link, if requested. - raw_ostream *ThinLinkOS = nullptr; - -public: - static char ID; // Pass identification, replacement for typeid - WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { - initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); - } - - explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS) - : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) { - initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } - - bool runOnModule(Module &M) override { - const ModuleSummaryIndex *Index = - &(getAnalysis().getIndex()); - writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index); - return true; - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } -}; } // anonymous namespace -char WriteThinLTOBitcode::ID = 0; -INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", - "Write ThinLTO Bitcode", false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", - "Write ThinLTO Bitcode", false, true) - -ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, - raw_ostream *ThinLinkOS) { - return new WriteThinLTOBitcode(Str, ThinLinkOS); -} - PreservedAnalyses llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { FunctionAnalysisManager &FAM = diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3185,6 +3185,20 @@ if (auto *II = dyn_cast(Cmp.getOperand(0))) if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C)) return I; + + // (extractval ([s/u]subo X, Y), 0) == 0 --> X == Y + // (extractval ([s/u]subo X, Y), 0) != 0 --> X != Y + // TODO: This checks one-use, but that is not strictly necessary. + Value *Cmp0 = Cmp.getOperand(0); + Value *X, *Y; + if (C->isZero() && Cmp.isEquality() && Cmp0->hasOneUse() && + (match(Cmp0, + m_ExtractValue<0>(m_Intrinsic( + m_Value(X), m_Value(Y)))) || + match(Cmp0, + m_ExtractValue<0>(m_Intrinsic( + m_Value(X), m_Value(Y)))))) + return new ICmpInst(Cmp.getPredicate(), X, Y); } if (match(Cmp.getOperand(1), m_APIntAllowUndef(C))) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -120,7 +120,15 @@ case Intrinsic::experimental_constrained_fcmp: case Intrinsic::experimental_constrained_fcmps: { auto *CFP = cast(CI); - return CFP->isDefaultFPEnvironment(); + if (CFP->getExceptionBehavior() && + CFP->getExceptionBehavior() == fp::ebStrict) + return false; + // Since we CSE across function calls we must not allow + // the rounding mode to change. + if (CFP->getRoundingMode() && + CFP->getRoundingMode() == RoundingMode::Dynamic) + return false; + return true; } } } @@ -1374,6 +1382,13 @@ // If this is a simple instruction that we can value number, process it. if (SimpleValue::canHandle(&Inst)) { + if (auto *CI = dyn_cast(&Inst)) { + assert(CI->getExceptionBehavior() != fp::ebStrict && + "Unexpected ebStrict from SimpleValue::canHandle()"); + assert((!CI->getRoundingMode() || + CI->getRoundingMode() != RoundingMode::Dynamic) && + "Unexpected dynamic rounding from SimpleValue::canHandle()"); + } // See if the instruction has an available value. If so, use it. if (Value *V = AvailableValues.lookup(&Inst)) { LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -2082,8 +2082,12 @@ auto InsertClobbersAt = [&](Instruction *IP) { for (auto *AI : ToClobber) { - auto PT = cast(AI->getAllocatedType()); - Constant *CPN = ConstantPointerNull::get(PT); + auto AT = AI->getAllocatedType(); + Constant *CPN; + if (AT->isVectorTy()) + CPN = ConstantAggregateZero::get(AT); + else + CPN = ConstantPointerNull::get(cast(AT)); new StoreInst(CPN, AI, IP); } }; diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1756,22 +1756,19 @@ return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI); } +// Emit a call to putchar(int) with Char as the argument. Char must have +// the same precision as int, which need not be 32 bits. Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B, const TargetLibraryInfo *TLI) { Module *M = B.GetInsertBlock()->getModule(); if (!isLibFuncEmittable(M, TLI, LibFunc_putchar)) return nullptr; + Type *Ty = Char->getType(); StringRef PutCharName = TLI->getName(LibFunc_putchar); - FunctionCallee PutChar = getOrInsertLibFunc(M, *TLI, LibFunc_putchar, - B.getInt32Ty(), B.getInt32Ty()); + FunctionCallee PutChar = getOrInsertLibFunc(M, *TLI, LibFunc_putchar, Ty, Ty); inferNonMandatoryLibFuncAttrs(M, PutCharName, *TLI); - CallInst *CI = B.CreateCall(PutChar, - B.CreateIntCast(Char, - B.getInt32Ty(), - /*isSigned*/true, - "chari"), - PutCharName); + CallInst *CI = B.CreateCall(PutChar, Char, PutCharName); if (const Function *F = dyn_cast(PutChar.getCallee()->stripPointerCasts())) diff --git a/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/llvm/lib/Transforms/Utils/FlattenCFG.cpp --- a/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -431,6 +431,9 @@ return false; BasicBlock *FirstEntryBlock = CInst1->getParent(); + // Don't die trying to process degenerate/unreachable code. + if (FirstEntryBlock == SecondEntryBlock) + return false; // Either then-path or else-path should be empty. bool InvertCond2 = false; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -473,7 +473,8 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { // Normal constant int. ConstantInt *CI = dyn_cast(V); - if (CI || !isa(V) || !V->getType()->isPointerTy()) + if (CI || !isa(V) || !V->getType()->isPointerTy() || + DL.isNonIntegralPointerType(V->getType())) return CI; // This is some kind of pointer constant. Turn it into a pointer-sized diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -692,6 +692,89 @@ return DstEnd; } +// Optimize a call to size_t strlcpy(char*, const char*, size_t). + +Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) { + Value *Size = CI->getArgOperand(2); + if (isKnownNonZero(Size, DL)) + // Like snprintf, the function stores into the destination only when + // the size argument is nonzero. + annotateNonNullNoUndefBasedOnAccess(CI, 0); + // The function reads the source argument regardless of Size (it returns + // its length). + annotateNonNullNoUndefBasedOnAccess(CI, 1); + + uint64_t NBytes; + if (ConstantInt *SizeC = dyn_cast(Size)) + NBytes = SizeC->getZExtValue(); + else + return nullptr; + + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + if (NBytes <= 1) { + if (NBytes == 1) + // For a call to strlcpy(D, S, 1) first store a nul in *D. + B.CreateStore(B.getInt8(0), Dst); + + // Transform strlcpy(D, S, 0) to a call to strlen(S). + return copyFlags(*CI, emitStrLen(Src, B, DL, TLI)); + } + + // Try to determine the length of the source, substituting its size + // when it's not nul-terminated (as it's required to be) to avoid + // reading past its end. + StringRef Str; + if (!getConstantStringInfo(Src, Str, 0, /*TrimAtNul=*/false)) + return nullptr; + + uint64_t SrcLen = Str.find('\0'); + // Set if the terminating nul should be copied by the call to memcpy + // below. + bool NulTerm = SrcLen < NBytes; + + if (NulTerm) + // Overwrite NBytes with the number of bytes to copy, including + // the terminating nul. + NBytes = SrcLen + 1; + else { + // Set the length of the source for the function to return to its + // size, and cap NBytes at the same. + SrcLen = std::min(SrcLen, uint64_t(Str.size())); + NBytes = std::min(NBytes - 1, SrcLen); + } + + if (SrcLen == 0) { + // Transform strlcpy(D, "", N) to (*D = '\0, 0). + B.CreateStore(B.getInt8(0), Dst); + return ConstantInt::get(CI->getType(), 0); + } + + Function *Callee = CI->getCalledFunction(); + Type *PT = Callee->getFunctionType()->getParamType(0); + // Transform strlcpy(D, S, N) to memcpy(D, S, N') where N' is the lower + // bound on strlen(S) + 1 and N, optionally followed by a nul store to + // D[N' - 1] if necessary. + CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), + ConstantInt::get(DL.getIntPtrType(PT), NBytes)); + NewCI->setAttributes(CI->getAttributes()); + NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + copyFlags(*CI, NewCI); + + if (!NulTerm) { + Value *EndOff = ConstantInt::get(CI->getType(), NBytes); + Value *EndPtr = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, EndOff); + B.CreateStore(B.getInt8(0), EndPtr); + } + + // Like snprintf, strlcpy returns the number of nonzero bytes that would + // have been copied if the bound had been sufficiently big (which in this + // case is strlen(Src)). + return ConstantInt::get(CI->getType(), SrcLen); +} + +// Optimize a call to strncpy. + Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) { Function *Callee = CI->getCalledFunction(); Value *Dst = CI->getArgOperand(0); @@ -2550,21 +2633,24 @@ //===----------------------------------------------------------------------===// Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilderBase &B) { - // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 + // All variants of ffs return int which need not be 32 bits wide. + // ffs{,l,ll}(x) -> x != 0 ? (int)llvm.cttz(x)+1 : 0 + Type *RetType = CI->getType(); Value *Op = CI->getArgOperand(0); Type *ArgType = Op->getType(); Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), Intrinsic::cttz, ArgType); Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); - V = B.CreateIntCast(V, B.getInt32Ty(), false); + V = B.CreateIntCast(V, RetType, false); Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); - return B.CreateSelect(Cond, V, B.getInt32(0)); + return B.CreateSelect(Cond, V, ConstantInt::get(RetType, 0)); } Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilderBase &B) { - // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false)) + // All variants of fls return int which need not be 32 bits wide. + // fls{,l,ll}(x) -> (int)(sizeInBits(x) - llvm.ctlz(x, false)) Value *Op = CI->getArgOperand(0); Type *ArgType = Op->getType(); Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(), @@ -2587,15 +2673,17 @@ Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilderBase &B) { // isdigit(c) -> (c-'0') getArgOperand(0); - Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); - Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); + Type *ArgType = Op->getType(); + Op = B.CreateSub(Op, ConstantInt::get(ArgType, '0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 10), "isdigit"); return B.CreateZExt(Op, CI->getType()); } Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilderBase &B) { // isascii(c) -> c getArgOperand(0); - Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); + Type *ArgType = Op->getType(); + Op = B.CreateICmpULT(Op, ConstantInt::get(ArgType, 128), "isascii"); return B.CreateZExt(Op, CI->getType()); } @@ -2701,9 +2789,15 @@ if (!CI->use_empty()) return nullptr; + Type *IntTy = CI->getType(); // printf("x") -> putchar('x'), even for "%" and "%%". - if (FormatStr.size() == 1 || FormatStr == "%%") - return copyFlags(*CI, emitPutChar(B.getInt32(FormatStr[0]), B, TLI)); + if (FormatStr.size() == 1 || FormatStr == "%%") { + // Convert the character to unsigned char before passing it to putchar + // to avoid host-specific sign extension in the IR. Putchar converts + // it to unsigned char regardless. + Value *IntChar = ConstantInt::get(IntTy, (unsigned char)FormatStr[0]); + return copyFlags(*CI, emitPutChar(IntChar, B, TLI)); + } // Try to remove call or emit putchar/puts. if (FormatStr == "%s" && CI->arg_size() > 1) { @@ -2714,8 +2808,13 @@ if (OperandStr.empty()) return (Value *)CI; // printf("%s", "a") --> putchar('a') - if (OperandStr.size() == 1) - return copyFlags(*CI, emitPutChar(B.getInt32(OperandStr[0]), B, TLI)); + if (OperandStr.size() == 1) { + // Convert the character to unsigned char before passing it to putchar + // to avoid host-specific sign extension in the IR. Putchar converts + // it to unsigned char regardless. + Value *IntChar = ConstantInt::get(IntTy, (unsigned char)OperandStr[0]); + return copyFlags(*CI, emitPutChar(IntChar, B, TLI)); + } // printf("%s", str"\n") --> puts(str) if (OperandStr.back() == '\n') { OperandStr = OperandStr.drop_back(); @@ -2738,8 +2837,12 @@ // Optimize specific format strings. // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->arg_size() > 1 && - CI->getArgOperand(1)->getType()->isIntegerTy()) - return copyFlags(*CI, emitPutChar(CI->getArgOperand(1), B, TLI)); + CI->getArgOperand(1)->getType()->isIntegerTy()) { + // Convert the argument to the type expected by putchar, i.e., int, which + // need not be 32 bits wide but which is the same as printf's return type. + Value *IntChar = B.CreateIntCast(CI->getArgOperand(1), IntTy, false); + return copyFlags(*CI, emitPutChar(IntChar, B, TLI)); + } // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->arg_size() > 1 && @@ -2908,6 +3011,60 @@ return nullptr; } +// Transform an snprintf call CI with the bound N to format the string Str +// either to a call to memcpy, or to single character a store, or to nothing, +// and fold the result to a constant. A nonnull StrArg refers to the string +// argument being formatted. Otherwise the call is one with N < 2 and +// the "%c" directive to format a single character. +Value *LibCallSimplifier::emitSnPrintfMemCpy(CallInst *CI, Value *StrArg, + StringRef Str, uint64_t N, + IRBuilderBase &B) { + assert(StrArg || (N < 2 && Str.size() == 1)); + + unsigned IntBits = TLI->getIntSize(); + uint64_t IntMax = maxIntN(IntBits); + if (Str.size() > IntMax) + // Bail if the string is longer than INT_MAX. POSIX requires + // implementations to set errno to EOVERFLOW in this case, in + // addition to when N is larger than that (checked by the caller). + return nullptr; + + Value *StrLen = ConstantInt::get(CI->getType(), Str.size()); + if (N == 0) + return StrLen; + + // Set to the number of bytes to copy fron StrArg which is also + // the offset of the terinating nul. + uint64_t NCopy; + if (N > Str.size()) + // Copy the full string, including the terminating nul (which must + // be present regardless of the bound). + NCopy = Str.size() + 1; + else + NCopy = N - 1; + + Value *DstArg = CI->getArgOperand(0); + if (NCopy && StrArg) + // Transform the call to lvm.memcpy(dst, fmt, N). + copyFlags( + *CI, + B.CreateMemCpy( + DstArg, Align(1), StrArg, Align(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), NCopy))); + + if (N > Str.size()) + // Return early when the whole format string, including the final nul, + // has been copied. + return StrLen; + + // Otherwise, when truncating the string append a terminating nul. + Type *Int8Ty = B.getInt8Ty(); + Value *NulOff = B.getIntN(IntBits, NCopy); + Value *DstEnd = B.CreateInBoundsGEP(Int8Ty, DstArg, NulOff, "endptr"); + B.CreateStore(ConstantInt::get(Int8Ty, 0), DstEnd); + return StrLen; +} + Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilderBase &B) { // Check for size @@ -2916,78 +3073,66 @@ return nullptr; uint64_t N = Size->getZExtValue(); + uint64_t IntMax = maxIntN(TLI->getIntSize()); + if (N > IntMax) + // Bail if the bound exceeds INT_MAX. POSIX requires implementations + // to set errno to EOVERFLOW in this case. + return nullptr; + + Value *DstArg = CI->getArgOperand(0); + Value *FmtArg = CI->getArgOperand(2); + // Check for a fixed format string. StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) + if (!getConstantStringInfo(FmtArg, FormatStr)) return nullptr; // If we just have a format string (nothing else crazy) transform it. if (CI->arg_size() == 3) { - // Make sure there's no % in the constant array. We could try to handle - // %% -> % in the future if we cared. if (FormatStr.contains('%')) - return nullptr; // we found a format specifier, bail out. - - if (N == 0) - return ConstantInt::get(CI->getType(), FormatStr.size()); - else if (N < FormatStr.size() + 1) + // Bail if the format string contains a directive and there are + // no arguments. We could handle "%%" in the future. return nullptr; - // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt, - // strlen(fmt)+1) - copyFlags( - *CI, - B.CreateMemCpy( - CI->getArgOperand(0), Align(1), CI->getArgOperand(2), Align(1), - ConstantInt::get(DL.getIntPtrType(CI->getContext()), - FormatStr.size() + 1))); // Copy the null byte. - return ConstantInt::get(CI->getType(), FormatStr.size()); + return emitSnPrintfMemCpy(CI, FmtArg, FormatStr, N, B); } // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() == 2 && FormatStr[0] == '%' && CI->arg_size() == 4) { - - // Decode the second character of the format string. - if (FormatStr[1] == 'c') { - if (N == 0) - return ConstantInt::get(CI->getType(), 1); - else if (N == 1) - return nullptr; - - // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getArgOperand(3)->getType()->isIntegerTy()) - return nullptr; - Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char"); - Value *Ptr = castToCStr(CI->getArgOperand(0), B); - B.CreateStore(V, Ptr); - Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); - B.CreateStore(B.getInt8(0), Ptr); + if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() != 4) + return nullptr; - return ConstantInt::get(CI->getType(), 1); + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + if (N <= 1) { + // Use an arbitary string of length 1 to transform the call into + // either a nul store (N == 1) or a no-op (N == 0) and fold it + // to one. + StringRef CharStr("*"); + return emitSnPrintfMemCpy(CI, nullptr, CharStr, N, B); } - if (FormatStr[1] == 's') { - // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1) - StringRef Str; - if (!getConstantStringInfo(CI->getArgOperand(3), Str)) - return nullptr; + // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(3)->getType()->isIntegerTy()) + return nullptr; + Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char"); + Value *Ptr = castToCStr(DstArg, B); + B.CreateStore(V, Ptr); + Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + return ConstantInt::get(CI->getType(), 1); + } - if (N == 0) - return ConstantInt::get(CI->getType(), Str.size()); - else if (N < Str.size() + 1) - return nullptr; + if (FormatStr[1] != 's') + return nullptr; - copyFlags( - *CI, B.CreateMemCpy(CI->getArgOperand(0), Align(1), - CI->getArgOperand(3), Align(1), - ConstantInt::get(CI->getType(), Str.size() + 1))); + Value *StrArg = CI->getArgOperand(3); + // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1) + StringRef Str; + if (!getConstantStringInfo(StrArg, Str)) + return nullptr; - // The snprintf result is the unincremented number of bytes in the string. - return ConstantInt::get(CI->getType(), Str.size()); - } - } - return nullptr; + return emitSnPrintfMemCpy(CI, StrArg, Str, N, B); } Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) { @@ -3150,8 +3295,12 @@ // Check for a constant string. // puts("") -> putchar('\n') StringRef Str; - if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) - return copyFlags(*CI, emitPutChar(B.getInt32('\n'), B, TLI)); + if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty()) { + // putchar takes an argument of the same type as puts returns, i.e., + // int, which need not be 32 bits wide. + Type *IntTy = CI->getType(); + return copyFlags(*CI, emitPutChar(ConstantInt::get(IntTy, '\n'), B, TLI)); + } return nullptr; } @@ -3198,6 +3347,8 @@ return optimizeStrCpy(CI, Builder); case LibFunc_stpcpy: return optimizeStpCpy(CI, Builder); + case LibFunc_strlcpy: + return optimizeStrLCpy(CI, Builder); case LibFunc_strncpy: return optimizeStrNCpy(CI, Builder); case LibFunc_strlen: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -205,7 +205,7 @@ /// \returns True if the value is a constant (but not globals/constant /// expressions). static bool isConstant(Value *V) { - return isa(V) && !isa(V) && !isa(V); + return isa(V) && !isa(V); } /// Checks if \p V is one of vector-like instructions, i.e. undef, @@ -2994,7 +2994,7 @@ // okay. auto *In = BundleMember->Inst; assert(In && - (isa(In) || isa(In) || + (isa(In) || In->getNumOperands() == TE->getNumOperands()) && "Missed TreeEntry operands?"); (void)In; // fake use to avoid build failure when assertions disabled @@ -4489,7 +4489,7 @@ } else if (auto *I = dyn_cast(V)) { // Sort other instructions just by the opcodes except for CMPInst. // For CMP also sort by the predicate kind. - if ((isa(I) || isa(I)) && + if ((isa(I)) && isValidForAlternation(I->getOpcode())) { if (AllowAlternate) Key = hash_value(isa(I) ? 1 : 0); @@ -5536,8 +5536,7 @@ unsigned N = 1; Type *EltTy = T; - while (isa(EltTy) || isa(EltTy) || - isa(EltTy)) { + while (isa(EltTy)) { if (auto *ST = dyn_cast(EltTy)) { // Check that struct is homogeneous. for (const auto *Ty : ST->elements()) @@ -5867,9 +5866,9 @@ // Take credit for instruction that will become dead. if (EE->hasOneUse()) { Instruction *Ext = EE->user_back(); - if ((isa(Ext) || isa(Ext)) && - all_of(Ext->users(), - [](User *U) { return isa(U); })) { + if (isa(Ext) && all_of(Ext->users(), [](User *U) { + return isa(U); + })) { // Use getExtractWithExtendCost() to calculate the cost of // extractelement/ext pair. Cost -= @@ -6142,18 +6141,18 @@ // Take credit for instruction that will become dead. if (EI->hasOneUse()) { Instruction *Ext = EI->user_back(); - if ((isa(Ext) || isa(Ext)) && + if (isa(Ext) && all_of(Ext->users(), [](User *U) { return isa(U); })) { - // Use getExtractWithExtendCost() to calculate the cost of - // extractelement/ext pair. - CommonCost -= TTI->getExtractWithExtendCost( - Ext->getOpcode(), Ext->getType(), VecTy, I); - // Add back the cost of s|zext which is subtracted separately. - CommonCost += TTI->getCastInstrCost( - Ext->getOpcode(), Ext->getType(), EI->getType(), - TTI::getCastContextHint(Ext), CostKind, Ext); - continue; + // Use getExtractWithExtendCost() to calculate the cost of + // extractelement/ext pair. + CommonCost -= TTI->getExtractWithExtendCost( + Ext->getOpcode(), Ext->getType(), VecTy, I); + // Add back the cost of s|zext which is subtracted separately. + CommonCost += TTI->getCastInstrCost( + Ext->getOpcode(), Ext->getType(), EI->getType(), + TTI::getCastContextHint(Ext), CostKind, Ext); + continue; } } CommonCost -= @@ -9001,8 +9000,8 @@ for (Instruction &In : llvm::make_early_inc_range(*BB)) { if (isDeleted(&In)) continue; - if (!isa(&In) && !isa(&In) && - !isa(&In) && !GatherShuffleSeq.contains(&In)) + if (!isa(&In) && + !GatherShuffleSeq.contains(&In)) continue; // Check if we can replace this instruction with any of the @@ -9660,17 +9659,15 @@ // If the current instruction is a load, update MaxWidth to reflect the // width of the loaded value. - if (isa(I) || isa(I) || - isa(I)) + if (isa(I)) Width = std::max(Width, DL->getTypeSizeInBits(Ty)); // Otherwise, we need to visit the operands of the instruction. We only // handle the interesting cases from buildTree here. If an operand is an // instruction we haven't yet visited and from the same basic block as the // user or the use is a PHI node, we add it to the worklist. - else if (isa(I) || isa(I) || isa(I) || - isa(I) || isa(I) || isa(I) || - isa(I)) { + else if (isa(I)) { for (Use &U : I->operands()) if (auto *J = dyn_cast(U.get())) if (Visited.insert(J).second && @@ -9723,8 +9720,7 @@ break; case Instruction::ZExt: case Instruction::SExt: - if (isa(I->getOperand(0)) || - isa(I->getOperand(0))) + if (isa(I->getOperand(0))) return false; break; @@ -10083,7 +10079,7 @@ InstructionCost Cost = R.getTreeCost(); - LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF =" << VF << "\n"); + LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF=" << VF << "\n"); if (Cost < -SLPCostThreshold) { LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n"); @@ -10384,6 +10380,7 @@ CandidateFound = true; MinCost = std::min(MinCost, Cost); + LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF=" << VF << "\n"); if (Cost < -SLPCostThreshold) { LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList", @@ -10422,8 +10419,7 @@ if (!I) return false; - if ((!isa(I) && !isa(I)) || - isa(I->getType())) + if (!isa(I) || isa(I->getType())) return false; Value *P = I->getParent(); @@ -11224,8 +11220,8 @@ InstructionCost ReductionCost = getReductionCost(TTI, VL, ReduxWidth, RdxFMF); InstructionCost Cost = TreeCost + ReductionCost; + LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for reduction\n"); if (!Cost.isValid()) { - LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n"); return nullptr; } if (Cost >= -SLPCostThreshold) { @@ -11533,8 +11529,7 @@ getInsertIndex(LastInsertInst, OperandOffset); if (!OperandIndex) return; - if (isa(InsertedOperand) || - isa(InsertedOperand)) { + if (isa(InsertedOperand)) { findBuildAggregate_rec(cast(InsertedOperand), TTI, BuildVectorOpds, InsertElts, *OperandIndex); @@ -11544,8 +11539,7 @@ } LastInsertInst = dyn_cast(LastInsertInst->getOperand(0)); } while (LastInsertInst != nullptr && - (isa(LastInsertInst) || - isa(LastInsertInst)) && + isa(LastInsertInst) && LastInsertInst->hasOneUse()); } @@ -12240,11 +12234,24 @@ // Ran into an instruction without users, like terminator, or function call // with ignored return value, store. Ignore unused instructions (basing on // instruction type, except for CallInst and InvokeInst). - if (it->use_empty() && (it->getType()->isVoidTy() || isa(it) || - isa(it))) { + if (it->use_empty() && + (it->getType()->isVoidTy() || isa(it))) { KeyNodes.insert(&*it); bool OpsChanged = false; - if (ShouldStartVectorizeHorAtStore || !isa(it)) { + auto *SI = dyn_cast(it); + bool TryToVectorizeRoot = ShouldStartVectorizeHorAtStore || !SI; + if (SI) { + auto I = Stores.find(getUnderlyingObject(SI->getPointerOperand())); + // Try to vectorize chain in store, if this is the only store to the + // address in the block. + // TODO: This is just a temporarily solution to save compile time. Need + // to investigate if we can safely turn on slp-vectorize-hor-store + // instead to allow lookup for reduction chains in all non-vectorized + // stores (need to check side effects and compile time). + TryToVectorizeRoot = (I == Stores.end() || I->second.size() == 1) && + SI->getValueOperand()->hasOneUse(); + } + if (TryToVectorizeRoot) { for (auto *V : it->operand_values()) { // Try to match and vectorize a horizontal reduction. OpsChanged |= vectorizeRootInstruction(nullptr, V, BB, R, TTI); @@ -12265,8 +12272,7 @@ } } - if (isa(it) || isa(it) || - isa(it)) + if (isa(it)) PostProcessInstructions.push_back(&*it); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -137,13 +137,13 @@ ; CHECK-NEXT: successors: %[[BB_L1:bb.[0-9]+]](0x80000000) ; ; Check basic block L1 has 2 successors: BBL1 and BBL2 -; CHECK: [[BB_L1]].{{[a-zA-Z0-9.]+}} (address-taken): +; CHECK: [[BB_L1]].{{[a-zA-Z0-9.]+}} (ir-block-address-taken %ir-block.{{[a-zA-Z0-9.]+}}): ; CHECK-NEXT: successors: %[[BB_L1]](0x40000000), ; CHECK: %[[BB_L2:bb.[0-9]+]](0x40000000) ; CHECK: G_BRINDIRECT %{{[0-9]+}}(p0) ; ; Check basic block L2 is the return basic block -; CHECK: [[BB_L2]].{{[a-zA-Z0-9.]+}} (address-taken): +; CHECK: [[BB_L2]].{{[a-zA-Z0-9.]+}} (ir-block-address-taken %ir-block.{{[a-zA-Z0-9.]+}}): ; CHECK-NEXT: RET_ReallyLR @indirectbr.L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@indirectbr, %L1), i8* blockaddress(@indirectbr, %L2), i8* null], align 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir @@ -25,20 +25,21 @@ body: | ; CHECK-LABEL: name: test_blockaddress ; CHECK: bb.0 (%ir-block.0): - ; CHECK: [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) - ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @addr - ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @addr - ; CHECK: G_STORE [[BLOCK_ADDR]](p0), [[ADD_LOW]](p0) :: (store (p0) into @addr) - ; CHECK: G_BRINDIRECT [[BLOCK_ADDR]](p0) - ; CHECK: bb.1.block (address-taken): - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) + ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @addr + ; CHECK-NEXT: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @addr + ; CHECK-NEXT: G_STORE [[BLOCK_ADDR]](p0), [[ADD_LOW]](p0) :: (store (p0) into @addr) + ; CHECK-NEXT: G_BRINDIRECT [[BLOCK_ADDR]](p0) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.block (ir-block-address-taken %ir-block.block): + ; CHECK-NEXT: RET_ReallyLR bb.1 (%ir-block.0): %0:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) %1:_(p0) = G_GLOBAL_VALUE @addr G_STORE %0(p0), %1(p0) :: (store (p0) into @addr) G_BRINDIRECT %0(p0) - bb.2.block (address-taken): + bb.2.block (ir-block-address-taken %ir-block.block): RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir @@ -30,32 +30,34 @@ body: | ; CHECK-LABEL: name: test_blockaddress ; CHECK: bb.0 (%ir-block.0): - ; CHECK: [[MOVaddrBA:%[0-9]+]]:gpr64common = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block) - ; CHECK: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[MOVaddrBA]] - ; CHECK: STRXui [[COPY]], [[MOVaddr]], 0 :: (store (p0) into @addr) - ; CHECK: BR [[MOVaddrBA]] - ; CHECK: bb.1.block (address-taken): + ; CHECK-NEXT: [[MOVaddrBA:%[0-9]+]]:gpr64common = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block) + ; CHECK-NEXT: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY [[MOVaddrBA]] + ; CHECK-NEXT: STRXui [[COPY]], [[MOVaddr]], 0 :: (store (p0) into @addr) + ; CHECK-NEXT: BR [[MOVaddrBA]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.block (ir-block-address-taken %ir-block.block): ; LARGE-LABEL: name: test_blockaddress ; LARGE: bb.0 (%ir-block.0): - ; LARGE: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 0 - ; LARGE: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 16 - ; LARGE: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 32 - ; LARGE: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) blockaddress(@test_blockaddress, %ir-block.block), 48 - ; LARGE: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @addr, 0 - ; LARGE: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @addr, 16 - ; LARGE: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @addr, 32 - ; LARGE: [[MOVKXi5:%[0-9]+]]:gpr64common = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @addr, 48 - ; LARGE: STRXui [[MOVKXi2]], [[MOVKXi5]], 0 :: (store (p0) into @addr) - ; LARGE: BR [[MOVKXi2]] - ; LARGE: bb.1.block (address-taken): + ; LARGE-NEXT: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 0 + ; LARGE-NEXT: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 16 + ; LARGE-NEXT: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 32 + ; LARGE-NEXT: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) blockaddress(@test_blockaddress, %ir-block.block), 48 + ; LARGE-NEXT: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @addr, 0 + ; LARGE-NEXT: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @addr, 16 + ; LARGE-NEXT: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @addr, 32 + ; LARGE-NEXT: [[MOVKXi5:%[0-9]+]]:gpr64common = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @addr, 48 + ; LARGE-NEXT: STRXui [[MOVKXi2]], [[MOVKXi5]], 0 :: (store (p0) into @addr) + ; LARGE-NEXT: BR [[MOVKXi2]] + ; LARGE-NEXT: {{ $}} + ; LARGE-NEXT: bb.1.block (ir-block-address-taken %ir-block.block): bb.1 (%ir-block.0): %0:gpr(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) %1:gpr(p0) = G_GLOBAL_VALUE @addr G_STORE %0(p0), %1(p0) :: (store (p0) into @addr) G_BRINDIRECT %0(p0) - bb.2.block (address-taken): + bb.2.block (ir-block-address-taken %ir-block.block): RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll --- a/llvm/test/CodeGen/AArch64/arm64-csel.ll +++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll @@ -292,6 +292,32 @@ ret i64 %. } +; Regression test for FalseVal - TrueVal overflow +define i64 @foo18_overflow3(i1 %cmp) nounwind readnone optsize ssp { +; CHECK-LABEL: foo18_overflow3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #-9223372036854775808 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: csel x0, x8, xzr, ne +; CHECK-NEXT: ret +entry: + %. = select i1 %cmp, i64 -9223372036854775808, i64 0 + ret i64 %. +} + +; Regression test for TrueVal - FalseVal overflow +define i64 @foo18_overflow4(i1 %cmp) nounwind readnone optsize ssp { +; CHECK-LABEL: foo18_overflow4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #-9223372036854775808 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: csel x0, xzr, x8, ne +; CHECK-NEXT: ret +entry: + %. = select i1 %cmp, i64 0, i64 -9223372036854775808 + ret i64 %. +} + define i64 @foo19(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: foo19: ; CHECK: // %bb.0: // %entry diff --git a/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir b/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir +++ /dev/null @@ -1,371 +0,0 @@ -# RUN: llc -run-pass=aarch64-branch-targets -mattr=+pauth %s -o - | FileCheck %s ---- | - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - target triple = "aarch64-arm-none-eabi" - - define hidden i32 @simple_external() "branch-target-enforcement"="true" { - entry: - ret i32 0 - } - - define internal i32 @simple_internal() "branch-target-enforcement"="true" { - entry: - ret i32 0 - } - - define hidden i32 @ptr_auth() "branch-target-enforcement"="true" { - entry: - tail call void asm sideeffect "", "~{lr}"() - ret i32 0 - } - - define hidden i32 @ptr_auth_b() "branch-target-enforcement"="true" { - entry: - tail call void asm sideeffect "", "~{lr}"() - ret i32 0 - } - - define hidden i32 @jump_table(i32 %a) "branch-target-enforcement"="true" { - entry: - switch i32 %a, label %sw.epilog [ - i32 1, label %sw.bb - i32 2, label %sw.bb1 - i32 3, label %sw.bb2 - i32 4, label %sw.bb3 - i32 5, label %sw.bb4 - ] - - sw.bb: ; preds = %entry - tail call void asm sideeffect "", ""() - br label %sw.epilog - - sw.bb1: ; preds = %entry - tail call void asm sideeffect "", ""() - br label %sw.epilog - - sw.bb2: ; preds = %entry - tail call void asm sideeffect "", ""() - br label %sw.epilog - - sw.bb3: ; preds = %entry - tail call void asm sideeffect "", ""() - br label %sw.epilog - - sw.bb4: ; preds = %entry - tail call void asm sideeffect "", ""() - br label %sw.epilog - - sw.epilog: ; preds = %entry, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb - ret i32 0 - } - - @label_address.addr = internal unnamed_addr global i8* blockaddress(@label_address, %return), align 8 - - define hidden i32 @label_address() "branch-target-enforcement"="true" { - entry: - %0 = load i8*, i8** @label_address.addr, align 8 - indirectbr i8* %0, [label %return, label %lab2] - - lab2: ; preds = %entry - br label %.split - - return: ; preds = %entry - br label %.split - - .split: ; preds = %lab2, %return - %merge = phi i8* [ blockaddress(@label_address, %lab2), %return ], [ blockaddress(@label_address, %return), %lab2 ] - %merge2 = phi i32 [ 1, %return ], [ 2, %lab2 ] - store i8* %merge, i8** @label_address.addr, align 8 - ret i32 %merge2 - } - - define hidden i32 @label_address_entry() "branch-target-enforcement"="true" { - entry: - %0 = load i8*, i8** @label_address.addr, align 8 - indirectbr i8* %0, [label %return, label %lab2] - - lab2: ; preds = %entry - br label %.split - - return: ; preds = %entry - br label %.split - - .split: ; preds = %lab2, %return - %merge = phi i8* [ blockaddress(@label_address, %lab2), %return ], [ blockaddress(@label_address, %return), %lab2 ] - %merge2 = phi i32 [ 1, %return ], [ 2, %lab2 ] - store i8* %merge, i8** @label_address.addr, align 8 - ret i32 %merge2 - } - - define hidden i32 @debug_ptr_auth() "branch-target-enforcement"="true" { - entry: - tail call void asm sideeffect "", "~{lr}"() - ret i32 0 - } - -... ---- -# External function, could be addres-taken elsewhere so needs BTI JC. -name: simple_external -body: | - bb.0.entry: - ; CHECK-LABEL: name: simple_external - ; CHECK: HINT 34 - ; CHECK: RET - $w0 = ORRWrs $wzr, $wzr, 0 - RET undef $lr, implicit killed $w0 - ---- -# Internal function, not address-taken in this module, however the compiler -# cannot 100% ensure that later parts of the toolchain won't add indirect -# jumps. E.g. a linker adding a thunk to extend the range of a direct jump. -# Therefore, even this case needs a BTI. -name: simple_internal -body: | - bb.0.entry: - ; CHECK-LABEL: name: simple_internal - ; CHECK: HINT 34 - ; CHECK: RET - $w0 = ORRWrs $wzr, $wzr, 0 - RET undef $lr, implicit killed $w0 - ---- -# Function starts with PACIASP, which implicitly acts as BTI JC, so no change -# needed. -name: ptr_auth -stack: - - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -body: | - bb.0.entry: - liveins: $lr - - ; CHECK-LABEL: name: ptr_auth - ; CHECK-NOT: HINT - ; CHECK: frame-setup PACIASP - ; CHECK-NOT: HINT - ; CHECK: RETAA - frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) - INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr - $w0 = ORRWrs $wzr, $wzr, 0 - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) - RETAA implicit $sp, implicit $lr, implicit killed $w0 - ---- -# Function starts with PACIBSP, which implicitly acts as BTI JC, so no change -# needed. -name: ptr_auth_b -stack: - - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -body: | - bb.0.entry: - liveins: $lr - - ; CHECK-LABEL: name: ptr_auth_b - ; CHECK-NOT: HINT - ; CHECK: frame-setup PACIBSP - ; CHECK-NOT: HINT - ; CHECK: RETAB - frame-setup PACIBSP implicit-def $lr, implicit killed $lr, implicit $sp - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) - INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr - $w0 = ORRWrs $wzr, $wzr, 0 - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) - RETAB implicit $sp, implicit $lr, implicit killed $w0 - ---- -# Function contains a jump table, so every target of the jump table must start -# with BTI J. -name: jump_table -jumpTable: - kind: block-address - entries: - - id: 0 - blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5', '%bb.6' ] -body: | - bb.0.entry: - ; CHECK-LABEL: name: jump_table - ; CHECK: HINT 34 - successors: %bb.7(0x15555555), %bb.1(0x6aaaaaab) - liveins: $w0 - - renamable $w8 = SUBWri killed renamable $w0, 1, 0, implicit-def $x8 - dead $wzr = SUBSWri renamable $w8, 4, 0, implicit-def $nzcv - Bcc 8, %bb.7, implicit $nzcv - - bb.1.entry: - ; CHECK: bb.1.entry: - ; CHECK-NOT: HINT - ; CHECK: BR killed renamable $x8 - successors: %bb.2(0x1999999a), %bb.3(0x1999999a), %bb.4(0x1999999a), %bb.5(0x1999999a), %bb.6(0x1999999a) - liveins: $x8 - - $x9 = ADRP target-flags(aarch64-page) %jump-table.0 - renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0, 0 - renamable $x8 = LDRXroX killed renamable $x9, killed renamable $x8, 0, 1 :: (load (s64) from jump-table) - BR killed renamable $x8 - - bb.2.sw.bb: - ; CHECK: bb.2.sw.bb - ; CHECK-NEXT: HINT 36 - $w0 = ORRWrs $wzr, $wzr, 0 - INLINEASM &"", 1 - RET undef $lr, implicit killed $w0 - - bb.3.sw.bb1: - ; CHECK: bb.3.sw.bb1 - ; CHECK-NEXT: HINT 36 - $w0 = ORRWrs $wzr, $wzr, 0 - INLINEASM &"", 1 - RET undef $lr, implicit killed $w0 - - bb.4.sw.bb2: - ; CHECK: bb.4.sw.bb2 - ; CHECK-NEXT: HINT 36 - $w0 = ORRWrs $wzr, $wzr, 0 - INLINEASM &"", 1 - RET undef $lr, implicit killed $w0 - - bb.5.sw.bb3: - ; CHECK: bb.5.sw.bb3 - ; CHECK-NEXT: HINT 36 - $w0 = ORRWrs $wzr, $wzr, 0 - INLINEASM &"", 1 - RET undef $lr, implicit killed $w0 - - bb.6.sw.bb4: - ; CHECK: bb.6.sw.bb4 - ; CHECK-NEXT: successors: %bb.7(0x80000000) - ; CHECK-NEXT: {{ }} - ; CHECK-NEXT: HINT 36 - successors: %bb.7(0x80000000) - - INLINEASM &"", 1 - - bb.7.sw.epilog: - ; CHECK: bb.7.sw.epilog: - ; CHECK-NOT: HINT - ; CHECK: RET - $w0 = ORRWrs $wzr, $wzr, 0 - RET undef $lr, implicit killed $w0 - ---- -# Function takes address of basic blocks, so they must start with BTI J. -name: label_address -body: | - bb.0.entry: - ; CHECK-LABEL: label_address - ; CHECK: bb.0.entry: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: {{ }} - ; CHECK-NEXT: HINT 34 - ; CHECK: BR killed renamable $x9 - successors: %bb.1(0x40000000), %bb.2(0x40000000) - - renamable $x8 = ADRP target-flags(aarch64-page) @label_address.addr - renamable $x9 = LDRXui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (dereferenceable load (s64) from @label_address.addr) - BR killed renamable $x9 - - bb.1.return (address-taken): - ; CHECK: bb.1.return (address-taken): - ; CHECK-NEXT: liveins: - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: HINT 36 - liveins: $x8 - - $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.lab2) - renamable $w0 = ORRWri $wzr, 0 - renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.lab2), 0 - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store (s64) into @label_address.addr) - RET undef $lr, implicit killed $w0 - - bb.2.lab2 (address-taken): - ; CHECK: bb.2.lab2 (address-taken): - ; CHECK-NEXT: liveins: - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: HINT 36 - liveins: $x8 - - $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.return) - renamable $w0 = ORRWri $wzr, 1984 - renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.return), 0 - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store (s64) into @label_address.addr) - RET undef $lr, implicit killed $w0 - ---- -# Function takes address of the entry block, so the entry block needs a BTI JC. -name: label_address_entry -stack: - - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -body: | - bb.0.entry (address-taken): - ; CHECK-LABEL: label_address_entry - ; CHECK: bb.0.entry (address-taken): - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: {{ }} - ; CHECK-NEXT: HINT 38 - ; CHECK: BR killed renamable $x9 - successors: %bb.1(0x40000000), %bb.2(0x40000000) - - renamable $x8 = ADRP target-flags(aarch64-page) @label_address.addr - renamable $x9 = LDRXui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (dereferenceable load (s64) from @label_address.addr) - BR killed renamable $x9 - - bb.1.return (address-taken): - ; CHECK: bb.1.return (address-taken): - ; CHECK-NEXT: liveins: - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: HINT 36 - liveins: $x8 - frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp - frame-setup CFI_INSTRUCTION negate_ra_sign_state - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) - INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr - $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.entry) - renamable $w0 = ORRWri $wzr, 0 - renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.entry), 0 - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store (s64) into @label_address.addr) - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) - RETAA implicit $sp, implicit $lr, implicit killed $w0 - - bb.2.lab2: - ; CHECK: bb.2.lab2: - ; CHECK-NOT: HINT - liveins: $x8 - - $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.return) - renamable $w0 = ORRWri $wzr, 1984 - renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.return), 0 - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store (s64) into @label_address.addr) - RET undef $lr, implicit killed $w0 ---- -# When PACIASP is the first real instruction in the functions then BTI should not be inserted. -name: debug_ptr_auth -stack: - - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, - stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -body: | - bb.0.entry: - liveins: $lr - - ; CHECK-LABEL: name: debug_ptr_auth - ; CHECK-NOT: HINT - ; CHECK: frame-setup PACIASP - ; CHECK-NOT: HINT - ; CHECK: RETAA - frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp - frame-setup CFI_INSTRUCTION negate_ra_sign_state - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) - INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr - $w0 = ORRWrs $wzr, $wzr, 0 - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) - RETAA implicit $sp, implicit $lr, implicit killed $w0 - -... diff --git a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll --- a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll +++ b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll @@ -54,9 +54,8 @@ define <2 x i32> @sext_extract_zext_idx0(<4 x i16> %vec) nounwind { ; CHECK-LABEL: sext_extract_zext_idx0: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %zext = zext <4 x i16> %vec to <4 x i32> %extract = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %zext, i64 0) @@ -65,6 +64,21 @@ ret <2 x i32> %sext_inreg } +; Negative test, combine should not fire if sign extension is for a different width. +define <2 x i32> @sext_extract_zext_idx0_negtest(<4 x i16> %vec) nounwind { +; CHECK-LABEL: sext_extract_zext_idx0_negtest: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.2s, v0.2s, #17 +; CHECK-NEXT: sshr v0.2s, v0.2s, #17 +; CHECK-NEXT: ret + %zext = zext <4 x i16> %vec to <4 x i32> + %extract = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %zext, i64 0) + %sext_inreg_step0 = shl <2 x i32> %extract, + %sext_inreg = ashr <2 x i32> %sext_inreg_step0, + ret <2 x i32> %sext_inreg +} + define <4 x i16> @sext_extract_sext_idx0(<8 x i8> %vec) nounwind { ; CHECK-LABEL: sext_extract_sext_idx0: ; CHECK: // %bb.0: @@ -81,10 +95,9 @@ define <2 x i32> @sext_extract_zext_idx2(<4 x i16> %vec) nounwind { ; CHECK-LABEL: sext_extract_zext_idx2: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %zext = zext <4 x i16> %vec to <4 x i32> %extract = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %zext, i64 2) diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -396,10 +396,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: cset w9, gt ; CHECK-NEXT: csel w9, w10, w9, eq @@ -459,10 +458,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: cset w9, gt ; CHECK-NEXT: csel w9, w10, w9, eq @@ -528,10 +526,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp x1, #1 -; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: cset w9, gt ; CHECK-NEXT: csel w9, w10, w9, eq diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -397,37 +397,35 @@ ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csel x8, x0, xzr, lt ; CHECK-NEXT: csinc x9, x1, xzr, lt +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmp x9, #0 +; CHECK-NEXT: cset w9, gt +; CHECK-NEXT: csel w9, w10, w9, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: csel x10, x19, xzr, lt +; CHECK-NEXT: cset w12, lt ; CHECK-NEXT: cmp x11, #0 ; CHECK-NEXT: cset w11, gt ; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w9, gt -; CHECK-NEXT: csel w9, w12, w9, eq +; CHECK-NEXT: cmp w11, #0 +; CHECK-NEXT: csel x10, x10, xzr, ne ; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: csel x8, x8, xzr, ne -; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -511,37 +509,35 @@ ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinc x8, x1, xzr, lt -; CHECK-NEXT: csel x9, x0, xzr, lt +; CHECK-NEXT: csinc x9, x1, xzr, lt +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmp x9, #0 +; CHECK-NEXT: cset w9, gt +; CHECK-NEXT: csel w9, w10, w9, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: csel x10, x19, xzr, lt +; CHECK-NEXT: cset w12, lt ; CHECK-NEXT: cmp x11, #0 ; CHECK-NEXT: cset w11, gt ; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: csel w8, w12, w8, eq -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel x8, x9, xzr, ne ; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne +; CHECK-NEXT: csel x10, x10, xzr, ne +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csel x8, x8, xzr, ne ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -637,37 +633,35 @@ ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp x1, #1 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csinc x8, x1, xzr, lt -; CHECK-NEXT: csel x9, x0, xzr, lt +; CHECK-NEXT: csinc x9, x1, xzr, lt +; CHECK-NEXT: csel x8, x0, xzr, lt +; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmp x9, #0 +; CHECK-NEXT: cset w9, gt +; CHECK-NEXT: csel w9, w10, w9, eq ; CHECK-NEXT: cmp x20, #1 -; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: csel x10, x19, xzr, lt +; CHECK-NEXT: cset w12, lt ; CHECK-NEXT: cmp x11, #0 ; CHECK-NEXT: cset w11, gt ; CHECK-NEXT: csel w11, w12, w11, eq -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: csel w8, w12, w8, eq -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel x8, x9, xzr, ne ; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x9, x10, xzr, ne +; CHECK-NEXT: csel x10, x10, xzr, ne +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: csel x8, x8, xzr, ne ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -23,7 +23,6 @@ ; CHECK-NEXT: fcvtzs w8, s0 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csel w8, w8, wzr, lt -; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csinv w8, w8, wzr, ge ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -201,7 +200,6 @@ ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csel w8, w8, wzr, lt -; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csinv w8, w8, wzr, ge ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -382,7 +380,6 @@ ; CHECK-CVT-NEXT: fcvtzs w8, s0 ; CHECK-CVT-NEXT: cmp w8, #0 ; CHECK-CVT-NEXT: csel w8, w8, wzr, lt -; CHECK-CVT-NEXT: cmp w8, #0 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: and w0, w8, #0x1 ; CHECK-CVT-NEXT: ret @@ -392,7 +389,6 @@ ; CHECK-FP16-NEXT: fcvtzs w8, h0 ; CHECK-FP16-NEXT: cmp w8, #0 ; CHECK-FP16-NEXT: csel w8, w8, wzr, lt -; CHECK-FP16-NEXT: cmp w8, #0 ; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge ; CHECK-FP16-NEXT: and w0, w8, #0x1 ; CHECK-FP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -1306,11 +1306,9 @@ ; CHECK-NEXT: fcvtzs w8, d1 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csel w8, w8, wzr, lt -; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: csinv w8, w8, wzr, ge ; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: csel w9, w9, wzr, lt -; CHECK-NEXT: cmp w9, #0 ; CHECK-NEXT: csinv w9, w9, wzr, ge ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 @@ -2062,57 +2060,49 @@ ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: fcvtzs w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: fcvtzs w13, s0 ; CHECK-CVT-NEXT: fcvtzs w8, s2 -; CHECK-CVT-NEXT: mov s2, v1.s[2] -; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzs w10, s3 +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: cmp w8, #0 ; CHECK-CVT-NEXT: csel w8, w8, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w10, s2 -; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzs w12, s2 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: csel w9, w9, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w11, s1 -; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge ; CHECK-CVT-NEXT: cmp w10, #0 ; CHECK-CVT-NEXT: csel w10, w10, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmp w10, #0 -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge ; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csel w11, w11, wzr, lt ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge ; CHECK-CVT-NEXT: cmp w12, #0 ; CHECK-CVT-NEXT: csel w12, w12, wzr, lt -; CHECK-CVT-NEXT: cmp w12, #0 +; CHECK-CVT-NEXT: fcvtzs w9, s1 ; CHECK-CVT-NEXT: csinv w12, w12, wzr, ge ; CHECK-CVT-NEXT: cmp w13, #0 ; CHECK-CVT-NEXT: csel w13, w13, wzr, lt -; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csinv w9, w13, wzr, ge -; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge +; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: fmov s1, w9 -; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csel w8, w13, wzr, lt +; CHECK-CVT-NEXT: csel w8, w9, wzr, lt ; CHECK-CVT-NEXT: fcvtzs w9, s0 -; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: mov v1.s[1], w12 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: fmov s1, w13 ; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: csel w9, w9, wzr, lt +; CHECK-CVT-NEXT: mov v1.s[1], w12 ; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: mov v1.s[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge +; CHECK-CVT-NEXT: csel w8, w9, wzr, lt +; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: mov v2.s[3], w11 ; CHECK-CVT-NEXT: mov v1.s[3], w8 ; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll --- a/llvm/test/CodeGen/AArch64/i128-math.ll +++ b/llvm/test/CodeGen/AArch64/i128-math.ll @@ -68,10 +68,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x0, x2 ; CHECK-NEXT: adcs x9, x1, x3 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csinv x0, x8, xzr, eq -; CHECK-NEXT: csinv x1, x9, xzr, eq +; CHECK-NEXT: csinv x0, x8, xzr, lo +; CHECK-NEXT: csinv x1, x9, xzr, lo ; CHECK-NEXT: ret %1 = tail call i128 @llvm.uadd.sat.i128(i128 %x, i128 %y) ret i128 %1 @@ -126,10 +124,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbcs x9, x1, x3 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x0, xzr, x8, ne -; CHECK-NEXT: csel x1, xzr, x9, ne +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: csel x1, xzr, x9, lo ; CHECK-NEXT: ret %1 = tail call i128 @llvm.usub.sat.i128(i128 %x, i128 %y) ret i128 %1 @@ -185,11 +181,9 @@ ; CHECK-NEXT: adds x8, x0, x2 ; CHECK-NEXT: adcs x9, x1, x3 ; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: cset w11, vs -; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, ne -; CHECK-NEXT: csel x1, x11, x9, ne +; CHECK-NEXT: csel x0, x10, x8, vs +; CHECK-NEXT: csel x1, x11, x9, vs ; CHECK-NEXT: ret %1 = tail call i128 @llvm.sadd.sat.i128(i128 %x, i128 %y) ret i128 %1 @@ -245,11 +239,9 @@ ; CHECK-NEXT: subs x8, x0, x2 ; CHECK-NEXT: sbcs x9, x1, x3 ; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: cset w11, vs -; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: eor x11, x10, #0x8000000000000000 -; CHECK-NEXT: csel x0, x10, x8, ne -; CHECK-NEXT: csel x1, x11, x9, ne +; CHECK-NEXT: csel x0, x10, x8, vs +; CHECK-NEXT: csel x1, x11, x9, vs ; CHECK-NEXT: ret %1 = tail call i128 @llvm.ssub.sat.i128(i128 %x, i128 %y) ret i128 %1 diff --git a/llvm/test/CodeGen/AArch64/i256-math.ll b/llvm/test/CodeGen/AArch64/i256-math.ll --- a/llvm/test/CodeGen/AArch64/i256-math.ll +++ b/llvm/test/CodeGen/AArch64/i256-math.ll @@ -70,12 +70,10 @@ ; CHECK-NEXT: adcs x9, x1, x5 ; CHECK-NEXT: adcs x10, x2, x6 ; CHECK-NEXT: adcs x11, x3, x7 -; CHECK-NEXT: cset w12, hs -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csinv x0, x8, xzr, eq -; CHECK-NEXT: csinv x1, x9, xzr, eq -; CHECK-NEXT: csinv x2, x10, xzr, eq -; CHECK-NEXT: csinv x3, x11, xzr, eq +; CHECK-NEXT: csinv x0, x8, xzr, lo +; CHECK-NEXT: csinv x1, x9, xzr, lo +; CHECK-NEXT: csinv x2, x10, xzr, lo +; CHECK-NEXT: csinv x3, x11, xzr, lo ; CHECK-NEXT: ret %1 = tail call i256 @llvm.uadd.sat.i256(i256 %x, i256 %y) ret i256 %1 @@ -138,12 +136,10 @@ ; CHECK-NEXT: sbcs x9, x1, x5 ; CHECK-NEXT: sbcs x10, x2, x6 ; CHECK-NEXT: sbcs x11, x3, x7 -; CHECK-NEXT: cset w12, lo -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csel x0, xzr, x8, ne -; CHECK-NEXT: csel x1, xzr, x9, ne -; CHECK-NEXT: csel x2, xzr, x10, ne -; CHECK-NEXT: csel x3, xzr, x11, ne +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: csel x1, xzr, x9, lo +; CHECK-NEXT: csel x2, xzr, x10, lo +; CHECK-NEXT: csel x3, xzr, x11, lo ; CHECK-NEXT: ret %1 = tail call i256 @llvm.usub.sat.i256(i256 %x, i256 %y) ret i256 %1 @@ -206,14 +202,12 @@ ; CHECK-NEXT: adcs x9, x1, x5 ; CHECK-NEXT: adcs x10, x2, x6 ; CHECK-NEXT: adcs x11, x3, x7 -; CHECK-NEXT: cset w12, vs -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csel x0, x13, x8, ne -; CHECK-NEXT: eor x8, x13, #0x8000000000000000 -; CHECK-NEXT: csel x1, x13, x9, ne -; CHECK-NEXT: csel x2, x13, x10, ne -; CHECK-NEXT: csel x3, x8, x11, ne +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: csel x0, x12, x8, vs +; CHECK-NEXT: eor x8, x12, #0x8000000000000000 +; CHECK-NEXT: csel x1, x12, x9, vs +; CHECK-NEXT: csel x2, x12, x10, vs +; CHECK-NEXT: csel x3, x8, x11, vs ; CHECK-NEXT: ret %1 = tail call i256 @llvm.sadd.sat.i256(i256 %x, i256 %y) ret i256 %1 @@ -276,14 +270,12 @@ ; CHECK-NEXT: sbcs x9, x1, x5 ; CHECK-NEXT: sbcs x10, x2, x6 ; CHECK-NEXT: sbcs x11, x3, x7 -; CHECK-NEXT: cset w12, vs -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csel x0, x13, x8, ne -; CHECK-NEXT: eor x8, x13, #0x8000000000000000 -; CHECK-NEXT: csel x1, x13, x9, ne -; CHECK-NEXT: csel x2, x13, x10, ne -; CHECK-NEXT: csel x3, x8, x11, ne +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: csel x0, x12, x8, vs +; CHECK-NEXT: eor x8, x12, #0x8000000000000000 +; CHECK-NEXT: csel x1, x12, x9, vs +; CHECK-NEXT: csel x2, x12, x10, vs +; CHECK-NEXT: csel x3, x8, x11, vs ; CHECK-NEXT: ret %1 = tail call i256 @llvm.ssub.sat.i256(i256 %x, i256 %y) ret i256 %1 diff --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll @@ -0,0 +1,337 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck %s + +define <4 x double> @test_ldnp_v4f64(<4 x double>* %A) { +; CHECK-LABEL: test_ldnp_v4f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <4 x double>, <4 x double>* %A, align 8, !nontemporal !0 + ret <4 x double> %lv +} + +define <4 x i64> @test_ldnp_v4i64(<4 x i64>* %A) { +; CHECK-LABEL: test_ldnp_v4i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <4 x i64>, <4 x i64>* %A, align 8, !nontemporal !0 + ret <4 x i64> %lv +} + +define <8 x i32> @test_ldnp_v8i32(<8 x i32>* %A) { +; CHECK-LABEL: test_ldnp_v8i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <8 x i32>, <8 x i32>* %A, align 8, !nontemporal !0 + ret <8 x i32> %lv +} + +define <8 x float> @test_ldnp_v8f32(<8 x float>* %A) { +; CHECK-LABEL: test_ldnp_v8f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <8 x float>, <8 x float>* %A, align 8, !nontemporal !0 + ret <8 x float> %lv +} + +define <16 x i16> @test_ldnp_v16i16(<16 x i16>* %A) { +; CHECK-LABEL: test_ldnp_v16i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <16 x i16>, <16 x i16>* %A, align 8, !nontemporal !0 + ret <16 x i16> %lv +} + +define <16 x half> @test_ldnp_v16f16(<16 x half>* %A) { +; CHECK-LABEL: test_ldnp_v16f16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <16 x half>, <16 x half>* %A, align 8, !nontemporal !0 + ret <16 x half> %lv +} + +define <32 x i8> @test_ldnp_v32i8(<32 x i8>* %A) { +; CHECK-LABEL: test_ldnp_v32i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <32 x i8>, <32 x i8>* %A, align 8, !nontemporal !0 + ret <32 x i8> %lv +} + +define <4 x i32> @test_ldnp_v4i32(<4 x i32>* %A) { +; CHECK-LABEL: test_ldnp_v4i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load<4 x i32>, <4 x i32>* %A, align 8, !nontemporal !0 + ret <4 x i32> %lv +} + +define <4 x float> @test_ldnp_v4f32(<4 x float>* %A) { +; CHECK-LABEL: test_ldnp_v4f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load<4 x float>, <4 x float>* %A, align 8, !nontemporal !0 + ret <4 x float> %lv +} + +define <8 x i16> @test_ldnp_v8i16(<8 x i16>* %A) { +; CHECK-LABEL: test_ldnp_v8i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load <8 x i16>, <8 x i16>* %A, align 8, !nontemporal !0 + ret <8 x i16> %lv +} + +define <16 x i8> @test_ldnp_v16i8(<16 x i8>* %A) { +; CHECK-LABEL: test_ldnp_v16i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load <16 x i8>, <16 x i8>* %A, align 8, !nontemporal !0 + ret <16 x i8> %lv +} +define <2 x double> @test_ldnp_v2f64(<2 x double>* %A) { +; CHECK-LABEL: test_ldnp_v2f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load <2 x double>, <2 x double>* %A, align 8, !nontemporal !0 + ret <2 x double> %lv +} + +define <2 x i32> @test_ldnp_v2i32(<2 x i32>* %A) { +; CHECK-LABEL: test_ldnp_v2i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <2 x i32>, <2 x i32>* %A, align 8, !nontemporal !0 + ret <2 x i32> %lv +} + +define <2 x float> @test_ldnp_v2f32(<2 x float>* %A) { +; CHECK-LABEL: test_ldnp_v2f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <2 x float>, <2 x float>* %A, align 8, !nontemporal !0 + ret <2 x float> %lv +} + +define <4 x i16> @test_ldnp_v4i16(<4 x i16>* %A) { +; CHECK-LABEL: test_ldnp_v4i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <4 x i16>, <4 x i16>* %A, align 8, !nontemporal !0 + ret <4 x i16> %lv +} + +define <8 x i8> @test_ldnp_v8i8(<8 x i8>* %A) { +; CHECK-LABEL: test_ldnp_v8i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <8 x i8>, <8 x i8>* %A, align 8, !nontemporal !0 + ret <8 x i8> %lv +} + +define <1 x double> @test_ldnp_v1f64(<1 x double>* %A) { +; CHECK-LABEL: test_ldnp_v1f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <1 x double>, <1 x double>* %A, align 8, !nontemporal !0 + ret <1 x double> %lv +} + +define <1 x i64> @test_ldnp_v1i64(<1 x i64>* %A) { +; CHECK-LABEL: test_ldnp_v1i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <1 x i64>, <1 x i64>* %A, align 8, !nontemporal !0 + ret <1 x i64> %lv +} + +define <32 x i16> @test_ldnp_v32i16(<32 x i16>* %A) { +; CHECK-LABEL: test_ldnp_v32i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ldnp q2, q3, [x0, #32] +; CHECK-NEXT: ret + %lv = load <32 x i16>, <32 x i16>* %A, align 8, !nontemporal !0 + ret <32 x i16> %lv +} + +define <32 x half> @test_ldnp_v32f16(<32 x half>* %A) { +; CHECK-LABEL: test_ldnp_v32f16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ldnp q2, q3, [x0, #32] +; CHECK-NEXT: ret + %lv = load <32 x half>, <32 x half>* %A, align 8, !nontemporal !0 + ret <32 x half> %lv +} + +define <16 x i32> @test_ldnp_v16i32(<16 x i32>* %A) { +; CHECK-LABEL: test_ldnp_v16i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ldnp q2, q3, [x0, #32] +; CHECK-NEXT: ret + %lv = load <16 x i32>, <16 x i32>* %A, align 8, !nontemporal !0 + ret <16 x i32> %lv +} + +define <16 x float> @test_ldnp_v16f32(<16 x float>* %A) { +; CHECK-LABEL: test_ldnp_v16f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ldnp q2, q3, [x0, #32] +; CHECK-NEXT: ret + %lv = load <16 x float>, <16 x float>* %A, align 8, !nontemporal !0 + ret <16 x float> %lv +} + +define <17 x float> @test_ldnp_v17f32(<17 x float>* %A) { +; CHECK-LABEL: test_ldnp_v17f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q1, q2, [x0, #32] +; CHECK-NEXT: ldp q3, q4, [x0] +; CHECK-NEXT: ldr s0, [x0, #64] +; CHECK-NEXT: stp q3, q4, [x8] +; CHECK-NEXT: stp q1, q2, [x8, #32] +; CHECK-NEXT: str s0, [x8, #64] +; CHECK-NEXT: ret + %lv = load <17 x float>, <17 x float>* %A, align 8, !nontemporal !0 + ret <17 x float> %lv +} + +define <33 x double> @test_ldnp_v33f64(<33 x double>* %A) { +; CHECK-LABEL: test_ldnp_v33f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ldp q4, q5, [x0, #64] +; CHECK-NEXT: ldp q6, q7, [x0, #96] +; CHECK-NEXT: ldp q16, q17, [x0, #128] +; CHECK-NEXT: ldp q18, q19, [x0, #160] +; CHECK-NEXT: ldp q21, q22, [x0, #224] +; CHECK-NEXT: ldp q23, q24, [x0, #192] +; CHECK-NEXT: ldr d20, [x0, #256] +; CHECK-NEXT: stp q0, q1, [x8] +; CHECK-NEXT: stp q2, q3, [x8, #32] +; CHECK-NEXT: stp q4, q5, [x8, #64] +; CHECK-NEXT: str d20, [x8, #256] +; CHECK-NEXT: stp q6, q7, [x8, #96] +; CHECK-NEXT: stp q16, q17, [x8, #128] +; CHECK-NEXT: stp q18, q19, [x8, #160] +; CHECK-NEXT: stp q23, q24, [x8, #192] +; CHECK-NEXT: stp q21, q22, [x8, #224] +; CHECK-NEXT: ret + %lv = load <33 x double>, <33 x double>* %A, align 8, !nontemporal !0 + ret <33 x double> %lv +} + +define <33 x i8> @test_ldnp_v33i8(<33 x i8>* %A) { +; CHECK-LABEL: test_ldnp_v33i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: ldrb w9, [x0, #32] +; CHECK-NEXT: stp q1, q0, [x8] +; CHECK-NEXT: strb w9, [x8, #32] +; CHECK-NEXT: ret + %lv = load<33 x i8>, <33 x i8>* %A, align 8, !nontemporal !0 + ret <33 x i8> %lv +} + +define <4 x i65> @test_ldnp_v4i65(<4 x i65>* %A) { +; CHECK-LABEL: test_ldnp_v4i65: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp x8, x9, [x0, #8] +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr x10, [x0, #24] +; CHECK-NEXT: and x1, x8, #0x1 +; CHECK-NEXT: ldrb w11, [x0, #32] +; CHECK-NEXT: extr x2, x9, x8, #1 +; CHECK-NEXT: extr x4, x10, x9, #2 +; CHECK-NEXT: extr x6, x11, x10, #3 +; CHECK-NEXT: ubfx x3, x9, #1, #1 +; CHECK-NEXT: mov.d v0[1], x1 +; CHECK-NEXT: ubfx x5, x10, #2, #1 +; CHECK-NEXT: ubfx x7, x11, #3, #1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret + %lv = load <4 x i65>, <4 x i65>* %A, align 8, !nontemporal !0 + ret <4 x i65> %lv +} + +define <4 x i63> @test_ldnp_v4i63(<4 x i63>* %A) { +; CHECK-LABEL: test_ldnp_v4i63: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp x8, x9, [x0] +; CHECK-NEXT: ldp x10, x11, [x0, #16] +; CHECK-NEXT: extr x12, x9, x8, #63 +; CHECK-NEXT: and x0, x8, #0x7fffffffffffffff +; CHECK-NEXT: extr x9, x10, x9, #62 +; CHECK-NEXT: extr x10, x11, x10, #61 +; CHECK-NEXT: and x1, x12, #0x7fffffffffffffff +; CHECK-NEXT: and x2, x9, #0x7fffffffffffffff +; CHECK-NEXT: and x3, x10, #0x7fffffffffffffff +; CHECK-NEXT: ret + %lv = load <4 x i63>, <4 x i63>* %A, align 8, !nontemporal !0 + ret <4 x i63> %lv +} + +define <5 x double> @test_ldnp_v5f64(<5 x double>* %A) { +; CHECK-LABEL: test_ldnp_v5f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ext.16b v1, v0, v0, #8 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: ext.16b v3, v2, v2, #8 +; CHECK-NEXT: ldr d4, [x0, #32] +; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q2 +; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q3 +; CHECK-NEXT: ; kill: def $d4 killed $d4 killed $q4 +; CHECK-NEXT: ret + %lv = load<5 x double>, <5 x double>* %A, align 8, !nontemporal !0 + ret <5 x double> %lv +} + +define <16 x i64> @test_ldnp_v16i64(<16 x i64>* %A) { +; CHECK-LABEL: test_ldnp_v16i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ldnp q2, q3, [x0, #32] +; CHECK-NEXT: ldnp q4, q5, [x0, #64] +; CHECK-NEXT: ldnp q6, q7, [x0, #96] +; CHECK-NEXT: ret + %lv = load <16 x i64>, <16 x i64>* %A, align 8, !nontemporal !0 + ret <16 x i64> %lv +} + +define <16 x double> @test_ldnp_v16f64(<16 x double>* %A) { +; CHECK-LABEL: test_ldnp_v16f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ldnp q2, q3, [x0, #32] +; CHECK-NEXT: ldnp q4, q5, [x0, #64] +; CHECK-NEXT: ldnp q6, q7, [x0, #96] +; CHECK-NEXT: ret + %lv = load <16 x double>, <16 x double>* %A, align 8, !nontemporal !0 + ret <16 x double> %lv +} + + +!0 = !{i32 1} diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -352,20 +352,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x2, x11, x8, ne -; CHECK-NEXT: eor x8, x11, #0x8000000000000000 -; CHECK-NEXT: csel x3, x8, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: csel x2, x10, x8, vs +; CHECK-NEXT: eor x8, x10, #0x8000000000000000 +; CHECK-NEXT: csel x3, x8, x9, vs ; CHECK-NEXT: adds x8, x0, x4 ; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: eor x10, x11, #0x8000000000000000 -; CHECK-NEXT: csel x8, x11, x8, ne -; CHECK-NEXT: csel x1, x10, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: csel x8, x10, x8, vs +; CHECK-NEXT: eor x10, x10, #0x8000000000000000 +; CHECK-NEXT: csel x1, x10, x9, vs ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-sls.mir b/llvm/test/CodeGen/AArch64/speculation-hardening-sls.mir --- a/llvm/test/CodeGen/AArch64/speculation-hardening-sls.mir +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-sls.mir @@ -65,11 +65,11 @@ ; ISBDSB-NEXT: isb ; SB-NEXT: {{ sb$}} - bb.1.l2 (address-taken): + bb.1.l2 (ir-block-address-taken %ir-block.l2): renamable $w0 = MOVZWi 1, 0 RET undef $lr, implicit $w0 - bb.2.return (address-taken): + bb.2.return (ir-block-address-taken %ir-block.return): $w0 = ORRWrs $wzr, $wzr, 0 RET undef $lr, implicit $w0 ... @@ -90,11 +90,11 @@ ; ISBDSB-NEXT: isb ; SB-NEXT: {{ sb$}} - bb.1.l2 (address-taken): + bb.1.l2 (ir-block-address-taken %ir-block.l2): renamable $w0 = MOVZWi 1, 0 RET undef $lr, implicit $w0 - bb.2.return (address-taken): + bb.2.return (ir-block-address-taken %ir-block.return): $w0 = ORRWrs $wzr, $wzr, 0 RET undef $lr, implicit $w0 ... @@ -115,11 +115,11 @@ ; ISBDSB-NEXT: isb ; SB-NEXT: {{ sb$}} - bb.1.l2 (address-taken): + bb.1.l2 (ir-block-address-taken %ir-block.l2): renamable $w0 = MOVZWi 1, 0 RET undef $lr, implicit $w0 - bb.2.return (address-taken): + bb.2.return (ir-block-address-taken %ir-block.return): $w0 = ORRWrs $wzr, $wzr, 0 RET undef $lr, implicit $w0 ... @@ -140,11 +140,11 @@ ; ISBDSB-NEXT: isb ; SB-NEXT: {{ sb$}} - bb.1.l2 (address-taken): + bb.1.l2 (ir-block-address-taken %ir-block.l2): renamable $w0 = MOVZWi 1, 0 RET undef $lr, implicit $w0 - bb.2.return (address-taken): + bb.2.return (ir-block-address-taken %ir-block.return): $w0 = ORRWrs $wzr, $wzr, 0 RET undef $lr, implicit $w0 ... diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -355,20 +355,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x2, x11, x8, ne -; CHECK-NEXT: eor x8, x11, #0x8000000000000000 -; CHECK-NEXT: csel x3, x8, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: csel x2, x10, x8, vs +; CHECK-NEXT: eor x8, x10, #0x8000000000000000 +; CHECK-NEXT: csel x3, x8, x9, vs ; CHECK-NEXT: subs x8, x0, x4 ; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: eor x10, x11, #0x8000000000000000 -; CHECK-NEXT: csel x8, x11, x8, ne -; CHECK-NEXT: csel x1, x10, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: csel x8, x10, x8, vs +; CHECK-NEXT: eor x10, x10, #0x8000000000000000 +; CHECK-NEXT: csel x1, x10, x9, vs ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -575,9 +575,7 @@ ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: ldr q1, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -17,15 +17,11 @@ ; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -538,9 +538,7 @@ ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 +; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: ldr q2, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -17,9 +17,7 @@ ; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 +; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: mov v0.h[0], w8 diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -350,16 +350,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csinv x2, x8, xzr, eq -; CHECK-NEXT: csinv x3, x9, xzr, eq +; CHECK-NEXT: csinv x2, x8, xzr, lo +; CHECK-NEXT: csinv x3, x9, xzr, lo ; CHECK-NEXT: adds x8, x0, x4 ; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: cset w10, hs -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csinv x8, x8, xzr, eq -; CHECK-NEXT: csinv x1, x9, xzr, eq +; CHECK-NEXT: csinv x8, x8, xzr, lo +; CHECK-NEXT: csinv x1, x9, xzr, lo ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -346,16 +346,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x2, xzr, x8, ne -; CHECK-NEXT: csel x3, xzr, x9, ne +; CHECK-NEXT: csel x2, xzr, x8, lo +; CHECK-NEXT: csel x3, xzr, x9, lo ; CHECK-NEXT: subs x8, x0, x4 ; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x8, xzr, x8, ne -; CHECK-NEXT: csel x1, xzr, x9, ne +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: csel x1, xzr, x9, lo ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll --- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -326,26 +326,16 @@ define <8 x double> @sitofp_i16_double(<8 x i16> %a) { ; CHECK-LABEL: sitofp_i16_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v2.2s, v1.2s, #16 -; CHECK-NEXT: shl v3.2s, v0.2s, #16 -; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: sshr v3.2s, v3.2s, #16 -; CHECK-NEXT: sshll v2.2d, v2.2s, #0 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: scvtf v2.2d, v2.2d -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v0.2s, v0.2s, #16 -; CHECK-NEXT: sshll v3.2d, v3.2s, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: sshll2 v3.2d, v0.4s, #0 ; CHECK-NEXT: sshll v4.2d, v1.2s, #0 -; CHECK-NEXT: sshll v1.2d, v0.2s, #0 -; CHECK-NEXT: scvtf v0.2d, v3.2d -; CHECK-NEXT: scvtf v1.2d, v1.2d -; CHECK-NEXT: scvtf v3.2d, v4.2d +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: scvtf v1.2d, v3.2d +; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: scvtf v3.2d, v2.2d +; CHECK-NEXT: scvtf v2.2d, v4.2d ; CHECK-NEXT: ret %1 = sitofp <8 x i16> %a to <8 x double> ret <8 x double> %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -8,50 +8,51 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 ; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 -; CHECK-NEXT: v_mov_b32_e32 v15, v1 -; CHECK-NEXT: v_mov_b32_e32 v14, v2 -; CHECK-NEXT: v_mov_b32_e32 v13, v3 -; CHECK-NEXT: v_mov_b32_e32 v12, v4 -; CHECK-NEXT: v_mov_b32_e32 v11, v5 -; CHECK-NEXT: v_mov_b32_e32 v10, v6 -; CHECK-NEXT: v_mov_b32_e32 v9, v7 +; CHECK-NEXT: v_mov_b32_e32 v14, v1 +; CHECK-NEXT: v_mov_b32_e32 v13, v2 +; CHECK-NEXT: v_mov_b32_e32 v12, v3 +; CHECK-NEXT: v_mov_b32_e32 v11, v4 +; CHECK-NEXT: v_mov_b32_e32 v10, v5 +; CHECK-NEXT: v_mov_b32_e32 v9, v6 +; CHECK-NEXT: v_mov_b32_e32 v8, v7 ; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 killed $exec -; CHECK-NEXT: v_mov_b32_e32 v1, v15 -; CHECK-NEXT: v_mov_b32_e32 v2, v14 -; CHECK-NEXT: v_mov_b32_e32 v3, v13 -; CHECK-NEXT: v_mov_b32_e32 v4, v12 -; CHECK-NEXT: v_mov_b32_e32 v5, v11 -; CHECK-NEXT: v_mov_b32_e32 v6, v10 -; CHECK-NEXT: v_mov_b32_e32 v7, v9 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v1, v14 +; CHECK-NEXT: v_mov_b32_e32 v2, v13 +; CHECK-NEXT: v_mov_b32_e32 v3, v12 +; CHECK-NEXT: v_mov_b32_e32 v4, v11 +; CHECK-NEXT: v_mov_b32_e32 v5, v10 +; CHECK-NEXT: v_mov_b32_e32 v6, v9 +; CHECK-NEXT: v_mov_b32_e32 v7, v8 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s4, s8 ; CHECK-NEXT: s_mov_b32 s5, s8 ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 -; CHECK-NEXT: v_writelane_b32 v8, s4, 0 -; CHECK-NEXT: v_writelane_b32 v8, s5, 1 -; CHECK-NEXT: v_writelane_b32 v8, s6, 2 -; CHECK-NEXT: v_writelane_b32 v8, s7, 3 +; CHECK-NEXT: v_writelane_b32 v16, s4, 0 +; CHECK-NEXT: v_writelane_b32 v16, s5, 1 +; CHECK-NEXT: v_writelane_b32 v16, s6, 2 +; CHECK-NEXT: v_writelane_b32 v16, s7, 3 ; CHECK-NEXT: s_mov_b32 s6, 0 ; CHECK-NEXT: s_mov_b32 s4, s6 ; CHECK-NEXT: s_mov_b32 s5, s6 ; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: v_mov_b32_e32 v1, s5 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s4, exec_lo -; CHECK-NEXT: v_writelane_b32 v8, s4, 4 +; CHECK-NEXT: v_writelane_b32 v16, s4, 4 ; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload @@ -59,16 +60,15 @@ ; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v7, v9 -; CHECK-NEXT: v_mov_b32_e32 v6, v10 -; CHECK-NEXT: v_mov_b32_e32 v5, v11 -; CHECK-NEXT: v_mov_b32_e32 v4, v12 -; CHECK-NEXT: v_mov_b32_e32 v3, v13 -; CHECK-NEXT: v_mov_b32_e32 v2, v14 -; CHECK-NEXT: v_mov_b32_e32 v1, v15 -; CHECK-NEXT: v_mov_b32_e32 v0, v16 +; CHECK-NEXT: v_mov_b32_e32 v7, v8 +; CHECK-NEXT: v_mov_b32_e32 v6, v9 +; CHECK-NEXT: v_mov_b32_e32 v5, v10 +; CHECK-NEXT: v_mov_b32_e32 v4, v11 +; CHECK-NEXT: v_mov_b32_e32 v3, v12 +; CHECK-NEXT: v_mov_b32_e32 v2, v13 +; CHECK-NEXT: v_mov_b32_e32 v1, v14 +; CHECK-NEXT: v_mov_b32_e32 v0, v15 ; CHECK-NEXT: v_readfirstlane_b32 s12, v7 ; CHECK-NEXT: v_readfirstlane_b32 s10, v6 ; CHECK-NEXT: v_readfirstlane_b32 s9, v5 @@ -85,22 +85,22 @@ ; CHECK-NEXT: s_mov_b32 s17, s6 ; CHECK-NEXT: s_mov_b32 s18, s5 ; CHECK-NEXT: s_mov_b32 s19, s4 -; CHECK-NEXT: v_writelane_b32 v8, s12, 5 -; CHECK-NEXT: v_writelane_b32 v8, s13, 6 -; CHECK-NEXT: v_writelane_b32 v8, s14, 7 -; CHECK-NEXT: v_writelane_b32 v8, s15, 8 -; CHECK-NEXT: v_writelane_b32 v8, s16, 9 -; CHECK-NEXT: v_writelane_b32 v8, s17, 10 -; CHECK-NEXT: v_writelane_b32 v8, s18, 11 -; CHECK-NEXT: v_writelane_b32 v8, s19, 12 -; CHECK-NEXT: v_mov_b32_e32 v6, v9 -; CHECK-NEXT: v_mov_b32_e32 v7, v10 -; CHECK-NEXT: v_mov_b32_e32 v4, v11 -; CHECK-NEXT: v_mov_b32_e32 v5, v12 -; CHECK-NEXT: v_mov_b32_e32 v2, v13 -; CHECK-NEXT: v_mov_b32_e32 v3, v14 -; CHECK-NEXT: v_mov_b32_e32 v0, v15 -; CHECK-NEXT: v_mov_b32_e32 v1, v16 +; CHECK-NEXT: v_writelane_b32 v16, s12, 5 +; CHECK-NEXT: v_writelane_b32 v16, s13, 6 +; CHECK-NEXT: v_writelane_b32 v16, s14, 7 +; CHECK-NEXT: v_writelane_b32 v16, s15, 8 +; CHECK-NEXT: v_writelane_b32 v16, s16, 9 +; CHECK-NEXT: v_writelane_b32 v16, s17, 10 +; CHECK-NEXT: v_writelane_b32 v16, s18, 11 +; CHECK-NEXT: v_writelane_b32 v16, s19, 12 +; CHECK-NEXT: v_mov_b32_e32 v6, v8 +; CHECK-NEXT: v_mov_b32_e32 v7, v9 +; CHECK-NEXT: v_mov_b32_e32 v4, v10 +; CHECK-NEXT: v_mov_b32_e32 v5, v11 +; CHECK-NEXT: v_mov_b32_e32 v2, v12 +; CHECK-NEXT: v_mov_b32_e32 v3, v13 +; CHECK-NEXT: v_mov_b32_e32 v0, v14 +; CHECK-NEXT: v_mov_b32_e32 v1, v15 ; CHECK-NEXT: s_mov_b64 s[4:5], s[12:13] ; CHECK-NEXT: s_mov_b64 s[10:11], s[14:15] ; CHECK-NEXT: s_mov_b64 s[8:9], s[16:17] @@ -113,40 +113,40 @@ ; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[0:1] ; CHECK-NEXT: s_and_b32 s4, s4, s5 ; CHECK-NEXT: s_and_saveexec_b32 s4, s4 -; CHECK-NEXT: v_writelane_b32 v8, s4, 13 +; CHECK-NEXT: v_writelane_b32 v16, s4, 13 ; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s4, v8, 13 -; CHECK-NEXT: v_readlane_b32 s8, v8, 5 -; CHECK-NEXT: v_readlane_b32 s9, v8, 6 -; CHECK-NEXT: v_readlane_b32 s10, v8, 7 -; CHECK-NEXT: v_readlane_b32 s11, v8, 8 -; CHECK-NEXT: v_readlane_b32 s12, v8, 9 -; CHECK-NEXT: v_readlane_b32 s13, v8, 10 -; CHECK-NEXT: v_readlane_b32 s14, v8, 11 -; CHECK-NEXT: v_readlane_b32 s15, v8, 12 -; CHECK-NEXT: v_readlane_b32 s16, v8, 0 -; CHECK-NEXT: v_readlane_b32 s17, v8, 1 -; CHECK-NEXT: v_readlane_b32 s18, v8, 2 -; CHECK-NEXT: v_readlane_b32 s19, v8, 3 +; CHECK-NEXT: v_readlane_b32 s4, v16, 13 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s8, v16, 5 +; CHECK-NEXT: v_readlane_b32 s9, v16, 6 +; CHECK-NEXT: v_readlane_b32 s10, v16, 7 +; CHECK-NEXT: v_readlane_b32 s11, v16, 8 +; CHECK-NEXT: v_readlane_b32 s12, v16, 9 +; CHECK-NEXT: v_readlane_b32 s13, v16, 10 +; CHECK-NEXT: v_readlane_b32 s14, v16, 11 +; CHECK-NEXT: v_readlane_b32 s15, v16, 12 +; CHECK-NEXT: v_readlane_b32 s16, v16, 0 +; CHECK-NEXT: v_readlane_b32 s17, v16, 1 +; CHECK-NEXT: v_readlane_b32 s18, v16, 2 +; CHECK-NEXT: v_readlane_b32 s19, v16, 3 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; CHECK-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; CHECK-NEXT: s_cbranch_execnz .LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: v_readlane_b32 s4, v8, 4 +; CHECK-NEXT: v_readlane_b32 s4, v16, 4 ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, s4 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: s_or_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll --- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -32,39 +32,39 @@ ; GCN_DBG: ; %bb.0: ; %entry ; GCN_DBG-NEXT: s_load_dword s2, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s2, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s2, 0 ; GCN_DBG-NEXT: s_load_dword s1, s[0:1], 0xa ; GCN_DBG-NEXT: s_mov_b32 s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s2, -1 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: s_cmp_lg_u32 s1, s2 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_cbranch_scc1 .LBB0_2 ; GCN_DBG-NEXT: ; %bb.1: ; %for.exit ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB0_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 s2, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s2, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB0_2 ; GCN_DBG-NEXT: ; %bb.3: ; %DummyReturnBlock ; GCN_DBG-NEXT: s_endpgm @@ -107,35 +107,35 @@ ; GCN_DBG: ; %bb.0: ; %entry ; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_branch .LBB1_2 ; GCN_DBG-NEXT: .LBB1_1: ; %for.exit ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB1_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 s2, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s2, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], 0 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB1_1 ; GCN_DBG-NEXT: s_branch .LBB1_2 entry: @@ -172,35 +172,35 @@ ; GCN_DBG: ; %bb.0: ; %entry ; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_branch .LBB2_2 ; GCN_DBG-NEXT: .LBB2_1: ; %for.exit ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB2_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 s2, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s2, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB2_1 ; GCN_DBG-NEXT: s_branch .LBB2_2 entry: @@ -238,33 +238,33 @@ ; GCN_DBG: ; %bb.0: ; %entry ; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_branch .LBB3_2 ; GCN_DBG-NEXT: .LBB3_1: ; %for.exit ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB3_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 s2, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s2, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_cbranch_scc1 .LBB3_1 ; GCN_DBG-NEXT: s_branch .LBB3_2 entry: @@ -316,48 +316,48 @@ ; GCN_DBG: ; %bb.0: ; %entry ; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, 0 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: ds_read_u8 v1, v1 +; GCN_DBG-NEXT: ds_read_u8 v0, v0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_readfirstlane_b32 s0, v1 +; GCN_DBG-NEXT: v_readfirstlane_b32 s0, v0 ; GCN_DBG-NEXT: s_and_b32 s0, 1, s0 ; GCN_DBG-NEXT: s_cmp_eq_u32 s0, 1 ; GCN_DBG-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 -; GCN_DBG-NEXT: v_writelane_b32 v0, s1, 2 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s1, 2 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 3 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 3 ; GCN_DBG-NEXT: s_branch .LBB4_2 ; GCN_DBG-NEXT: .LBB4_1: ; %for.exit ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB4_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 3 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s3, v0, 2 -; GCN_DBG-NEXT: v_readlane_b32 s4, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 3 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s3, v2, 2 +; GCN_DBG-NEXT: v_readlane_b32 s4, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s4 ; GCN_DBG-NEXT: s_mov_b32 s4, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s4 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s4, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s4 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s4 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 3 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 3 ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB4_1 ; GCN_DBG-NEXT: s_branch .LBB4_2 entry: diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -420,11 +420,11 @@ ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1:[0-9]+]] ; GCN-O0: [[INNER_LOOP:.LBB[0-9]+_[0-9]+]]: -; GCN-O0: buffer_load_dword ; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]] ; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]] ; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]] ; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]] +; GCN-O0: buffer_load_dword ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]] ; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]] ; GCN-O0: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -48,9 +48,6 @@ ; VMEM: [[ENDIF]]: -; Restore val -; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload - ; Reload and restore exec mask ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -62,6 +59,9 @@ ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] +; Restore val +; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload + ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] define amdgpu_kernel void @divergent_if_endif(i32 addrspace(1)* %out) #0 { entry: @@ -121,7 +121,6 @@ ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: [[END]]: -; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -131,6 +130,7 @@ ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] +; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] define amdgpu_kernel void @divergent_loop(i32 addrspace(1)* %out) #0 { @@ -187,7 +187,6 @@ ; GCN-NEXT: s_branch [[ELSE:.LBB[0-9]+_[0-9]+]] ; GCN: [[FLOW]]: ; %Flow -; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -199,6 +198,7 @@ ; GCN: s_or_saveexec_b64 s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]], s[[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]] ; Regular spill value restored after exec modification +; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload ; Followed by spill ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], 0 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill @@ -230,7 +230,6 @@ ; GCN-NEXT: s_branch [[FLOW]] ; GCN: [[ENDIF]]: -; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] @@ -242,6 +241,7 @@ ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] +; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]] define amdgpu_kernel void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -13,7 +13,7 @@ ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 + ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) ; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1 ; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 @@ -23,7 +23,7 @@ ; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr6 ; GCN-NEXT: renamable $sgpr2 = COPY killed renamable $sgpr5 ; GCN-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr4 - ; GCN-NEXT: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.1, align 4, addrspace 5) + ; GCN-NEXT: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 16 ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 15 ; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 14 @@ -40,55 +40,59 @@ ; GCN-NEXT: renamable $sgpr13 = S_MOV_B32 2 ; GCN-NEXT: renamable $sgpr14 = S_MOV_B32 1 ; GCN-NEXT: renamable $sgpr15 = S_MOV_B32 0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr15 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr14 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr13 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr12 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr11 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr10 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr9 - ; GCN-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr8 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr7 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr6 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr5 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr4 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr3 - ; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr2 - ; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr1 - ; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr0 - ; GCN-NEXT: undef %28.sub0:vreg_512 = COPY [[COPY1]] - ; GCN-NEXT: %28.sub1:vreg_512 = COPY [[COPY2]] - ; GCN-NEXT: %28.sub2:vreg_512 = COPY [[COPY3]] - ; GCN-NEXT: %28.sub3:vreg_512 = COPY [[COPY4]] - ; GCN-NEXT: %28.sub4:vreg_512 = COPY [[COPY5]] - ; GCN-NEXT: %28.sub5:vreg_512 = COPY [[COPY6]] - ; GCN-NEXT: %28.sub6:vreg_512 = COPY [[COPY7]] - ; GCN-NEXT: %28.sub7:vreg_512 = COPY [[COPY8]] - ; GCN-NEXT: %28.sub8:vreg_512 = COPY [[COPY9]] - ; GCN-NEXT: %28.sub9:vreg_512 = COPY [[COPY10]] - ; GCN-NEXT: %28.sub10:vreg_512 = COPY [[COPY11]] - ; GCN-NEXT: %28.sub11:vreg_512 = COPY [[COPY12]] - ; GCN-NEXT: %28.sub12:vreg_512 = COPY [[COPY13]] - ; GCN-NEXT: %28.sub13:vreg_512 = COPY [[COPY14]] - ; GCN-NEXT: %28.sub14:vreg_512 = COPY [[COPY15]] - ; GCN-NEXT: %28.sub15:vreg_512 = COPY [[COPY16]] + ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15 + ; GCN-NEXT: renamable $vgpr30 = COPY killed renamable $sgpr14 + ; GCN-NEXT: renamable $vgpr29 = COPY killed renamable $sgpr13 + ; GCN-NEXT: renamable $vgpr28 = COPY killed renamable $sgpr12 + ; GCN-NEXT: renamable $vgpr27 = COPY killed renamable $sgpr11 + ; GCN-NEXT: renamable $vgpr26 = COPY killed renamable $sgpr10 + ; GCN-NEXT: renamable $vgpr25 = COPY killed renamable $sgpr9 + ; GCN-NEXT: renamable $vgpr24 = COPY killed renamable $sgpr8 + ; GCN-NEXT: renamable $vgpr23 = COPY killed renamable $sgpr7 + ; GCN-NEXT: renamable $vgpr22 = COPY killed renamable $sgpr6 + ; GCN-NEXT: renamable $vgpr21 = COPY killed renamable $sgpr5 + ; GCN-NEXT: renamable $vgpr20 = COPY killed renamable $sgpr4 + ; GCN-NEXT: renamable $vgpr19 = COPY killed renamable $sgpr3 + ; GCN-NEXT: renamable $vgpr18 = COPY killed renamable $sgpr2 + ; GCN-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr1 + ; GCN-NEXT: renamable $vgpr16 = COPY killed renamable $sgpr0 + ; GCN-NEXT: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GCN-NEXT: renamable $vgpr1 = COPY killed renamable $vgpr30 + ; GCN-NEXT: renamable $vgpr2 = COPY killed renamable $vgpr29 + ; GCN-NEXT: renamable $vgpr3 = COPY killed renamable $vgpr28 + ; GCN-NEXT: renamable $vgpr4 = COPY killed renamable $vgpr27 + ; GCN-NEXT: renamable $vgpr5 = COPY killed renamable $vgpr26 + ; GCN-NEXT: renamable $vgpr6 = COPY killed renamable $vgpr25 + ; GCN-NEXT: renamable $vgpr7 = COPY killed renamable $vgpr24 + ; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr23 + ; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr22 + ; GCN-NEXT: renamable $vgpr10 = COPY killed renamable $vgpr21 + ; GCN-NEXT: renamable $vgpr11 = COPY killed renamable $vgpr20 + ; GCN-NEXT: renamable $vgpr12 = COPY killed renamable $vgpr19 + ; GCN-NEXT: renamable $vgpr13 = COPY killed renamable $vgpr18 + ; GCN-NEXT: renamable $vgpr14 = COPY killed renamable $vgpr17 + ; GCN-NEXT: renamable $vgpr15 = COPY killed renamable $vgpr16 + ; GCN-NEXT: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5) ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) - ; GCN-NEXT: dead %45:vgpr_32 = COPY [[DEF]] - ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec - ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[COPY]](s32), implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5) + ; GCN-NEXT: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5) + ; GCN-NEXT: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec + ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 %28, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec - ; GCN-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] + ; GCN-NEXT: renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec + ; GCN-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 - ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) + ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) ; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GCN-NEXT: {{ $}} @@ -99,8 +103,9 @@ ; GCN-NEXT: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]], killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1) + ; GCN-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -227,14 +227,14 @@ ; W64-O0-DAG: s_mov_b32 [[IDX_S:s[0-9]+]], s{{[0-9]+}} ; W64-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], s{{[0-9]+}} -; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill ; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec +; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill ; W64-O0: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1 -; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]] ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] @@ -251,7 +251,7 @@ ; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]] ; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 ; 4-byte Folded Reload ; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[S0]]:[[S3]]], {{.*}} idxen ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill @@ -270,10 +270,10 @@ ; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]] ; W64-O0: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1 -; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]] ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] @@ -297,10 +297,10 @@ ; W64-O0: s_xor_b64 exec, exec, [[SAVE]] ; W64-O0-NEXT: s_cbranch_execnz [[LOOPBB1]] -; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload ; W64-O0: v_readlane_b32 s[[SAVEEXEC0:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX0]] ; W64-O0: v_readlane_b32 s[[SAVEEXEC1:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX1]] ; W64-O0: s_mov_b64 exec, s[[[SAVEEXEC0]]:[[SAVEEXEC1]]] +; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF]] ; 4-byte Folded Spill ; W64-O0: [[TERMBB]]: diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -19,10 +19,10 @@ ; CHECK-NEXT: v_writelane_b32 v40, s33, 2 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _ZL13sleep_foreverv@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _ZL13sleep_foreverv@gotpcrel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir b/llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir --- a/llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir @@ -79,3 +79,28 @@ %6:vreg_64 = V_MOV_B64_PSEUDO %2, implicit $exec S_NOP 0, implicit %1.sub0, implicit %1.sub3 ... +--- +name: dead_subreg_whole_reg +tracksRegLiveness: true +body: | + bb.0.entry: + ; GCN-LABEL: name: dead_subreg_whole_reg + ; GCN: $m0 = IMPLICIT_DEF + ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 1, implicit $m0 + ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_MOV_B64 renamable $sgpr0_sgpr1 + ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 2, implicit $m0 + ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_MOV_B64 3, implicit $m0 + ; GCN-NEXT: dead %4:vgpr_32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr4_sgpr5, implicit killed $sgpr2_sgpr3 + ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5) + ; GCN-NEXT: dead %5:vreg_64 = V_MOV_B64_PSEUDO killed $sgpr2_sgpr3, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0 + $m0 = IMPLICIT_DEF + %0:sreg_64_xexec = S_MOV_B64 1, implicit $m0 + %1:sreg_64 = S_MOV_B64 %0:sreg_64_xexec + %2:sreg_64 = S_MOV_B64 2, implicit $m0 + %3:sreg_64 = S_MOV_B64 3, implicit $m0 + %4:vgpr_32 = V_MOV_B32_e32 %0.sub0:sreg_64_xexec, implicit $exec, implicit %2, implicit %3 + %5:vreg_64 = V_MOV_B64_PSEUDO %1, implicit $exec + S_NOP 0, implicit %0.sub0 +... diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -191,23 +191,23 @@ ; we have no VGPR to allocate for SGPR spills. We are forced to spill to memory. ; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr: -; GCN: v_writelane_b32 v{{[0-9]+}}, s34, 0 -; GCN: v_writelane_b32 v{{[0-9]+}}, s35, 1 -; GCN: v_writelane_b32 v{{[0-9]+}}, s36, 2 -; GCN: v_writelane_b32 v{{[0-9]+}}, s37, 3 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 +; GCN: v_writelane_b32 [[A:v[0-9]+]], s34, 0 +; GCN: buffer_store_dword [[A]], off, s[0:3], s32 +; GCN: v_writelane_b32 [[B:v[0-9]+]], s35, 0 +; GCN: buffer_store_dword [[B]], off, s[0:3], s32 +; GCN: v_writelane_b32 [[C:v[0-9]+]], s36, 0 +; GCN: buffer_store_dword [[C]], off, s[0:3], s32 +; GCN: v_writelane_b32 [[D:v[0-9]+]], s37, 0 +; GCN: buffer_store_dword [[D]], off, s[0:3], s32 ; GCN: #ASMEND -; GCN: buffer_load_dword v{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}} -; GCN: buffer_load_dword v{{[0-9]+}} -; GCN: v_readlane_b32 s37, v{{[0-9]+}}, 3 -; GCN: v_readlane_b32 s36, v{{[0-9]+}}, 2 -; GCN: v_readlane_b32 s35, v{{[0-9]+}}, 1 -; GCN: v_readlane_b32 s34, v{{[0-9]+}}, 0 +; GCN: buffer_load_dword [[E:v[0-9]+]] +; GCN: v_readlane_b32 s37, [[E]], 0 +; GCN: buffer_load_dword [[F:v[0-9]+]] +; GCN: v_readlane_b32 s36, [[F]], 0 +; GCN: buffer_load_dword [[G:v[0-9]+]] +; GCN: v_readlane_b32 s35, [[G]], 0 +; GCN: buffer_load_dword [[H:v[0-9]+]] +; GCN: v_readlane_b32 s34, [[H]], 0 define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { %a = load <4 x i32>, <4 x i32> addrspace(1)* %in diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -133,7 +133,7 @@ ; GFX9-O0: ; %bb.0: ; %entry ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] @@ -144,18 +144,18 @@ ; GFX9-O0-NEXT: s_mov_b32 s39, s7 ; GFX9-O0-NEXT: s_mov_b64 s[42:43], s[38:39] ; GFX9-O0-NEXT: s_mov_b64 s[40:41], s[36:37] -; GFX9-O0-NEXT: v_writelane_b32 v3, s40, 0 -; GFX9-O0-NEXT: v_writelane_b32 v3, s41, 1 -; GFX9-O0-NEXT: v_writelane_b32 v3, s42, 2 -; GFX9-O0-NEXT: v_writelane_b32 v3, s43, 3 +; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 0 +; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 1 +; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], s34 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s34 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec @@ -165,23 +165,23 @@ ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 ; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, s34 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[34:35], exec -; GFX9-O0-NEXT: v_writelane_b32 v3, s34, 4 -; GFX9-O0-NEXT: v_writelane_b32 v3, s35, 5 +; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 5 ; GFX9-O0-NEXT: s_and_b64 s[34:35], s[34:35], s[36:37] ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_cbranch_execz .LBB1_2 ; GFX9-O0-NEXT: ; %bb.1: ; %if -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] @@ -194,20 +194,19 @@ ; GFX9-O0-NEXT: v_add_u32_e64 v1, v2, v1 ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: v_readlane_b32 s34, v3, 4 -; GFX9-O0-NEXT: v_readlane_b32 s35, v3, 5 +; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 4 +; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[34:35] -; GFX9-O0-NEXT: v_readlane_b32 s36, v3, 0 -; GFX9-O0-NEXT: v_readlane_b32 s37, v3, 1 -; GFX9-O0-NEXT: v_readlane_b32 s38, v3, 2 -; GFX9-O0-NEXT: v_readlane_b32 s39, v3, 3 +; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 0 +; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 3 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, v4 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, v3 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX9-O0-NEXT: s_mov_b32 s34, 1 ; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s34, v0 @@ -216,7 +215,7 @@ ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] diff --git a/llvm/test/CodeGen/ARM/ifcvt-diamond-unanalyzable-common.mir b/llvm/test/CodeGen/ARM/ifcvt-diamond-unanalyzable-common.mir --- a/llvm/test/CodeGen/ARM/ifcvt-diamond-unanalyzable-common.mir +++ b/llvm/test/CodeGen/ARM/ifcvt-diamond-unanalyzable-common.mir @@ -6,9 +6,11 @@ --- | target triple = "thumbv7-unknown-linux-gnueabi" - define dso_local void @fn1() { + define dso_local i8* @fn1() { + entry: + br label %l_yes l_yes: - ret void + ret i8* blockaddress(@fn1, %l_yes) } declare dso_local i32 @fn2(...) @@ -20,22 +22,25 @@ body: | ; CHECK-LABEL: name: fn1 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $r0, $r1, $r2, $r4, $lr - ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $lr - ; CHECK: t2CMPri killed renamable $r2, 34, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: $r0 = t2MOVi 2, 1 /* CC::ne */, $cpsr, $noreg - ; CHECK: $r0 = t2MOVi 3, 0 /* CC::eq */, killed $cpsr, $noreg, implicit killed $r0 - ; CHECK: tBL 14 /* CC::al */, $noreg, @fn2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp, implicit-def dead $r0 - ; CHECK: t2CMPri $sp, 34, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: t2Bcc %bb.2, 1 /* CC::ne */, $cpsr - ; CHECK: t2Bcc %bb.2, 2 /* CC::hs */, killed $cpsr - ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg - ; CHECK: bb.1: - ; CHECK: INLINEASM &"", 1 - ; CHECK: $sp = t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $pc - ; CHECK: bb.2.l_yes (address-taken): - ; CHECK: $sp = t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $pc + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $r0, $r1, $r2, $r4, $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $lr + ; CHECK-NEXT: t2CMPri killed renamable $r2, 34, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NEXT: $r0 = t2MOVi 2, 1 /* CC::ne */, $cpsr, $noreg + ; CHECK-NEXT: $r0 = t2MOVi 3, 0 /* CC::eq */, killed $cpsr, $noreg, implicit killed $r0 + ; CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @fn2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp, implicit-def dead $r0 + ; CHECK-NEXT: t2CMPri $sp, 34, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NEXT: t2Bcc %bb.2, 1 /* CC::ne */, $cpsr + ; CHECK-NEXT: t2Bcc %bb.2, 2 /* CC::hs */, killed $cpsr + ; CHECK-NEXT: t2B %bb.1, 14 /* CC::al */, $noreg + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: $sp = t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $pc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.l_yes (ir-block-address-taken %ir-block.l_yes): + ; CHECK-NEXT: $sp = t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $pc bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $r0, $r1, $r2, $r4, $lr @@ -70,7 +75,7 @@ INLINEASM &"", 1 $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc - bb.4.l_yes (address-taken): + bb.4.l_yes (ir-block-address-taken %ir-block.l_yes): $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/ARM/ifcvt-size.mir b/llvm/test/CodeGen/ARM/ifcvt-size.mir --- a/llvm/test/CodeGen/ARM/ifcvt-size.mir +++ b/llvm/test/CodeGen/ARM/ifcvt-size.mir @@ -553,7 +553,7 @@ INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) tBX_RET 14, $noreg, implicit $r2 - bb.5.lab1 (address-taken): + bb.5.lab1 (ir-block-address-taken %ir-block.lab1): liveins: $r0 renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg diff --git a/llvm/test/CodeGen/ARM/vector-store.ll b/llvm/test/CodeGen/ARM/vector-store.ll --- a/llvm/test/CodeGen/ARM/vector-store.ll +++ b/llvm/test/CodeGen/ARM/vector-store.ll @@ -419,3 +419,20 @@ store <3 x i8> zeroinitializer, <3 x i8> *%p, align 4 ret void } + +define void @v3i64shuffle(<3 x i64> *%p, <3 x i64> %a) { +; CHECK-LABEL: v3i64shuffle: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: ldrd r12, r1, [sp, #8] +; CHECK-NEXT: vmov d18, r2, r3 +; CHECK-NEXT: vorr d19, d16, d16 +; CHECK-NEXT: str r1, [r0, #20] +; CHECK-NEXT: vst1.32 {d18, d19}, [r0]! +; CHECK-NEXT: str.w r12, [r0] +; CHECK-NEXT: bx lr + %b = shufflevector <3 x i64> %a, <3 x i64> zeroinitializer, <3 x i32> + store <3 x i64> %b, <3 x i64> *%p, align 4 + ret void +} + diff --git a/llvm/test/CodeGen/AVR/interrupts.ll b/llvm/test/CodeGen/AVR/interrupts.ll --- a/llvm/test/CodeGen/AVR/interrupts.ll +++ b/llvm/test/CodeGen/AVR/interrupts.ll @@ -1,18 +1,16 @@ ; RUN: llc < %s -march=avr | FileCheck %s @count = global i8 0 +@funcptr = global void () addrspace(1)* null define avr_intrcc void @interrupt_handler() { ; CHECK-LABEL: interrupt_handler: ; CHECK: sei ; CHECK-NEXT: push r0 -; CHECK-NEXT: push r1 ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 -; CHECK-NEXT: clr r1 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: pop r1 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: reti ret void @@ -22,13 +20,10 @@ ; CHECK-LABEL: interrupt_handler_via_ir_attribute: ; CHECK: sei ; CHECK-NEXT: push r0 -; CHECK-NEXT: push r1 ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 -; CHECK-NEXT: clr r1 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: pop r1 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: reti ret void @@ -38,13 +33,10 @@ ; CHECK-LABEL: signal_handler: ; CHECK-NOT: sei ; CHECK: push r0 -; CHECK-NEXT: push r1 ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 -; CHECK-NEXT: clr r1 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: pop r1 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: reti ret void @@ -54,13 +46,10 @@ ; CHECK-LABEL: signal_handler_via_attribute: ; CHECK-NOT: sei ; CHECK: push r0 -; CHECK-NEXT: push r1 ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 -; CHECK-NEXT: clr r1 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: pop r1 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: reti ret void @@ -70,10 +59,8 @@ ; CHECK-LABEL: interrupt_alloca: ; CHECK: sei ; CHECK-NEXT: push r0 -; CHECK-NEXT: push r1 ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 -; CHECK-NEXT: clr r1 ; CHECK: push r28 ; CHECK-NEXT: push r29 ; CHECK-NEXT: in r28, 61 @@ -94,7 +81,6 @@ ; CHECK-NEXT: pop r28 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: pop r1 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: reti alloca i8 @@ -104,10 +90,8 @@ define void @signal_handler_with_increment() #1 { ; CHECK-LABEL: signal_handler_with_increment: ; CHECK: push r0 -; CHECK-NEXT: push r1 ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 -; CHECK-NEXT: clr r1 ; CHECK-NEXT: push r24 ; CHECK-NEXT: lds r24, count ; CHECK-NEXT: inc r24 @@ -115,7 +99,6 @@ ; CHECK-NEXT: pop r24 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: pop r1 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: reti %old = load volatile i8, i8* @count @@ -124,6 +107,29 @@ ret void } +; Check that r1 is saved/restored and set to 0 when using inline assembly. +define void @signal_handler_with_asm() #1 { +; CHECK-LABEL: signal_handler_with_asm: +; CHECK: push r0 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: push r0 +; CHECK-NEXT: push r1 +; CHECK-NEXT: clr r1 +; CHECK-NEXT: push r24 +; CHECK-NEXT: ldi +; ;APP +; CHECK: mov +; ;NO_APP +; CHECK: pop r24 +; CHECK-NEXT: pop r1 +; CHECK-NEXT: pop r0 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: pop r0 +; CHECK-NEXT: reti + call i8 asm sideeffect "mov $0, $1", "=r,r"(i8 3) nounwind + ret void +} + declare void @foo() ; When a signal handler calls a function, it must push/pop all call clobbered @@ -131,9 +137,9 @@ define void @signal_handler_with_call() #1 { ; CHECK-LABEL: signal_handler_with_call: ; CHECK: push r0 -; CHECK-NEXT: push r1 ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 +; CHECK-NEXT: push r1 ; CHECK-NEXT: clr r1 ; CHECK-NEXT: push r18 ; CHECK-NEXT: push r19 @@ -160,14 +166,58 @@ ; CHECK-NEXT: pop r20 ; CHECK-NEXT: pop r19 ; CHECK-NEXT: pop r18 +; CHECK-NEXT: pop r1 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: pop r1 ; CHECK-NEXT: pop r0 ; CHECK-NEXT: reti call void @foo() ret void } +define void @signal_handler_with_icall() #1 { +; CHECK-LABEL: signal_handler_with_icall: +; CHECK: push r0 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: push r0 +; CHECK-NEXT: push r1 +; CHECK-NEXT: clr r1 +; CHECK-NEXT: push r18 +; CHECK-NEXT: push r19 +; CHECK-NEXT: push r20 +; CHECK-NEXT: push r21 +; CHECK-NEXT: push r22 +; CHECK-NEXT: push r23 +; CHECK-NEXT: push r24 +; CHECK-NEXT: push r25 +; CHECK-NEXT: push r26 +; CHECK-NEXT: push r27 +; CHECK-NEXT: push r30 +; CHECK-NEXT: push r31 +; CHECK-NEXT: lds r30, funcptr +; CHECK-NEXT: lds r31, funcptr+1 +; CHECK-NEXT: icall +; CHECK-NEXT: pop r31 +; CHECK-NEXT: pop r30 +; CHECK-NEXT: pop r27 +; CHECK-NEXT: pop r26 +; CHECK-NEXT: pop r25 +; CHECK-NEXT: pop r24 +; CHECK-NEXT: pop r23 +; CHECK-NEXT: pop r22 +; CHECK-NEXT: pop r21 +; CHECK-NEXT: pop r20 +; CHECK-NEXT: pop r19 +; CHECK-NEXT: pop r18 +; CHECK-NEXT: pop r1 +; CHECK-NEXT: pop r0 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: pop r0 +; CHECK-NEXT: reti + %ptr = load volatile void() addrspace(1)*, void() addrspace(1)** @funcptr + call void %ptr() + ret void +} + attributes #0 = { "interrupt" } attributes #1 = { "signal" } diff --git a/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir b/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir +++ /dev/null @@ -1,30 +0,0 @@ -# RUN: llc -O0 %s -o - | FileCheck %s - -# This test checks the expansion of the 16-bit LDWRdPtr pseudo instruction. - ---- | - target triple = "avr--" - define void @test_ldwrdptr() { - entry: - ret void - } -... - ---- -name: test_ldwrdptr -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r31r30 - - ; CHECK-LABEL: test_ldwrdptr - - ; CHECK: ld [[SCRATCH:r[0-9]+]], Z - ; CHECK-NEXT: push [[SCRATCH]] - ; CHECK-NEXT: ldd [[SCRATCH]], Z+1 - ; CHECK-NEXT: mov r31, [[SCRATCH]] - ; CHECK-NEXT: pop r30 - - early-clobber $r31r30 = LDWRdPtr undef $r31r30 -... - diff --git a/llvm/test/CodeGen/AVR/pseudo/NEGWRd.mir b/llvm/test/CodeGen/AVR/pseudo/NEGWRd.mir --- a/llvm/test/CodeGen/AVR/pseudo/NEGWRd.mir +++ b/llvm/test/CodeGen/AVR/pseudo/NEGWRd.mir @@ -22,5 +22,5 @@ ; CHECK-NEXT: $r14 = NEGRd $r14 ; CHECK-NEXT: $r15 = SBCRdRr $r15, $r1, implicit-def $sreg, implicit killed $sreg - $r15r14 = NEGWRd $r15r14, implicit-def $sreg + $r15r14 = NEGWRd $r15r14, implicit-def $sreg, implicit $r1 ... diff --git a/llvm/test/CodeGen/AVR/pseudo/ROLBrd.mir b/llvm/test/CodeGen/AVR/pseudo/ROLBrd.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AVR/pseudo/ROLBrd.mir @@ -0,0 +1,25 @@ +# RUN: llc -O0 -run-pass=avr-expand-pseudo %s -o - | FileCheck %s + +# This test checks the expansion of the 8-bit ROLB (rotate) pseudo instruction. + +--- | + target triple = "avr--" + define void @test_rolbrd() { + entry: + ret void + } +... + +--- +name: test_rolbrd +body: | + bb.0.entry: + liveins: $r14 + + ; CHECK-LABEL: test_rolbrd + + ; CHECK: $r14 = ADDRdRr killed $r14, killed $r14, implicit-def $sreg + ; CHECK-NEXT: $r14 = ADCRdRr $r14, $r1, implicit-def dead $sreg, implicit killed $sreg + + $r14 = ROLBRd $r14, implicit-def $sreg, implicit $r1 +... diff --git a/llvm/test/CodeGen/AVR/unaligned-atomic-loads.ll b/llvm/test/CodeGen/AVR/unaligned-atomic-ops.ll rename from llvm/test/CodeGen/AVR/unaligned-atomic-loads.ll rename to llvm/test/CodeGen/AVR/unaligned-atomic-ops.ll --- a/llvm/test/CodeGen/AVR/unaligned-atomic-loads.ll +++ b/llvm/test/CodeGen/AVR/unaligned-atomic-ops.ll @@ -1,6 +1,6 @@ ; RUN: llc -mattr=addsubiw < %s -march=avr | FileCheck %s -; This verifies that the middle end can handle an unaligned atomic load. +; This verifies that the backend can handle an unaligned atomic load and store. ; ; In the past, an assertion inside the SelectionDAGBuilder would always ; hit an assertion for unaligned loads and stores. @@ -14,6 +14,7 @@ start: %a = getelementptr inbounds %AtomicI16, %AtomicI16* %self, i16 0, i32 0, i32 0 load atomic i16, i16* %a seq_cst, align 1 + store atomic i16 5, i16* %a seq_cst, align 1 ret void } diff --git a/llvm/test/CodeGen/AVR/zeroreg.ll b/llvm/test/CodeGen/AVR/zeroreg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AVR/zeroreg.ll @@ -0,0 +1,27 @@ +; RUN: llc -mattr=avr6,sram < %s -march=avr | FileCheck %s + +; This file tests whether the compiler correctly works with the r1 register, +; clearing it when needed. + +; Test regular use of r1 as a zero register. +; CHECK-LABEL: store8zero: +; CHECK: st {{[XYZ]}}, r1 +; CHECK-NEXT: mov r24, r1 +; CHECK-NEXT: ret +define i8 @store8zero(i8* %x) { + store i8 0, i8* %x + ret i8 0 +} + +; Test that mulitplication instructions (mul, muls, etc) clobber r1 and require +; a "clr r1" instruction. +; CHECK-LABEL: mul: +; CHECK: muls +; CHECK-NEXT: clr r1 +; CHECK-NEXT: st {{[XYZ]}}, r0 +; CHECK-NEXT: ret +define void @mul(i8* %ptr, i8 %n) { + %result = mul i8 %n, 3 + store i8 %result, i8* %ptr + ret void +} diff --git a/llvm/test/CodeGen/Hexagon/bank-conflict.mir b/llvm/test/CodeGen/Hexagon/bank-conflict.mir --- a/llvm/test/CodeGen/Hexagon/bank-conflict.mir +++ b/llvm/test/CodeGen/Hexagon/bank-conflict.mir @@ -107,7 +107,7 @@ $r4 = A2_tfrsi 10 J2_loop0r %bb.1, killed $r0, implicit-def $lc0, implicit-def $sa0, implicit-def $usr - bb.1 (address-taken): + bb.1 (machine-block-address-taken): successors: %bb.2(0x80000000) liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $sa0:0x00000004 diff --git a/llvm/test/CodeGen/Hexagon/hwloop-redef-imm.mir b/llvm/test/CodeGen/Hexagon/hwloop-redef-imm.mir --- a/llvm/test/CodeGen/Hexagon/hwloop-redef-imm.mir +++ b/llvm/test/CodeGen/Hexagon/hwloop-redef-imm.mir @@ -10,7 +10,7 @@ # CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 1920 # CHECK: J2_loop0r %bb.1, [[R0]] # -# CHECK: bb.1.b1 (address-taken): +# CHECK: bb.1.b1 (machine-block-address-taken): # CHECK: ENDLOOP0 %bb.1 diff --git a/llvm/test/CodeGen/Hexagon/loop_correctness.ll b/llvm/test/CodeGen/Hexagon/loop_correctness.ll --- a/llvm/test/CodeGen/Hexagon/loop_correctness.ll +++ b/llvm/test/CodeGen/Hexagon/loop_correctness.ll @@ -8,8 +8,8 @@ ; CHECK-NEXT: loop0(.LBB0_1,#3) ; CHECK-NEXT: } ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .Ltmp0: // Block address taken -; CHECK-NEXT: .LBB0_1: // %b2 +; CHECK-NEXT: .LBB0_1: // Block address taken +; CHECK-NEXT: // %b2 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { ; CHECK-NEXT: nop @@ -42,8 +42,8 @@ ; CHECK-NEXT: loop0(.LBB1_1,#2) ; CHECK-NEXT: } ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .Ltmp1: // Block address taken -; CHECK-NEXT: .LBB1_1: // %b2 +; CHECK-NEXT: .LBB1_1: // Block address taken +; CHECK-NEXT: // %b2 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { ; CHECK-NEXT: nop @@ -76,8 +76,8 @@ ; CHECK-NEXT: loop0(.LBB2_1,#1) ; CHECK-NEXT: } ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .Ltmp2: // Block address taken -; CHECK-NEXT: .LBB2_1: // %b2 +; CHECK-NEXT: .LBB2_1: // Block address taken +; CHECK-NEXT: // %b2 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { ; CHECK-NEXT: nop @@ -110,8 +110,8 @@ ; CHECK-NEXT: loop0(.LBB3_1,#4) ; CHECK-NEXT: } ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .Ltmp3: // Block address taken -; CHECK-NEXT: .LBB3_1: // %b2 +; CHECK-NEXT: .LBB3_1: // Block address taken +; CHECK-NEXT: // %b2 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { ; CHECK-NEXT: nop @@ -144,8 +144,8 @@ ; CHECK-NEXT: loop0(.LBB4_1,#2) ; CHECK-NEXT: } ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .Ltmp4: // Block address taken -; CHECK-NEXT: .LBB4_1: // %b2 +; CHECK-NEXT: .LBB4_1: // Block address taken +; CHECK-NEXT: // %b2 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { ; CHECK-NEXT: nop @@ -178,8 +178,8 @@ ; CHECK-NEXT: loop0(.LBB5_1,#2) ; CHECK-NEXT: } ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .Ltmp5: // Block address taken -; CHECK-NEXT: .LBB5_1: // %b2 +; CHECK-NEXT: .LBB5_1: // Block address taken +; CHECK-NEXT: // %b2 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir b/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir --- a/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir +++ b/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir @@ -132,7 +132,7 @@ %14:intregs = COPY %10 J2_loop0r %bb.3, %14, implicit-def $lc0, implicit-def $sa0, implicit-def $usr - bb.3.b2 (address-taken): + bb.3.b2 (machine-block-address-taken): successors: %bb.3(0x7c000000), %bb.4(0x04000000) %1:intregs = PHI %7, %bb.2, %5, %bb.3, post-instr-symbol diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir --- a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir @@ -80,7 +80,7 @@ %23:intregs = COPY %0 J2_loop0r %bb.2, %23, implicit-def $lc0, implicit-def $sa0, implicit-def $usr - bb.2 (address-taken): + bb.2 (machine-block-address-taken): successors: %bb.3, %bb.2 %3:intregs = PHI %2, %bb.1, %10, %bb.2 diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir --- a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir @@ -52,7 +52,7 @@ %11:intregs = IMPLICIT_DEF J2_loop0i %bb.1, 6, implicit-def $lc0, implicit-def $sa0, implicit-def $usr - bb.1 (address-taken): + bb.1 (machine-block-address-taken): successors: %bb.1, %bb.2 %0:intregs = PHI %11, %bb.0, %6, %bb.1 diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/e_flags.ll @@ -0,0 +1,15 @@ +; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32 +; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefix=ILP32D --match-full-lines +; RUN: llc --mtriple=loongarch64 --filetype=obj %s -o %t-la64 +; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefix=LP64D --match-full-lines + +;; Note that we have not support the -target-abi option to select specific ABI. +;; See comments in LoongArchELFStreamer.cpp. So here we only check the default behaviour. +;; After -target-abi is supported, we can add more tests. + +; LP64D: Flags: 0x3, LP64, DOUBLE-FLOAT +; ILP32D: Flags: 0x7, ILP32, DOUBLE-FLOAT + +define void @foo() { + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/fabs.ll b/llvm/test/CodeGen/LoongArch/fabs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fabs.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) + +define float @fabs_f32(float %a) nounwind { +; LA32F-LABEL: fabs_f32: +; LA32F: # %bb.0: +; LA32F-NEXT: fabs.s $fa0, $fa0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fabs_f32: +; LA32D: # %bb.0: +; LA32D-NEXT: fabs.s $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fabs_f32: +; LA64F: # %bb.0: +; LA64F-NEXT: fabs.s $fa0, $fa0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fabs_f32: +; LA64D: # %bb.0: +; LA64D-NEXT: fabs.s $fa0, $fa0 +; LA64D-NEXT: ret + %1 = call float @llvm.fabs.f32(float %a) + ret float %1 +} + +define double @fabs_f64(double %a) nounwind { +; LA32F-LABEL: fabs_f64: +; LA32F: # %bb.0: +; LA32F-NEXT: bstrpick.w $a1, $a1, 30, 0 +; LA32F-NEXT: ret +; +; LA32D-LABEL: fabs_f64: +; LA32D: # %bb.0: +; LA32D-NEXT: fabs.d $fa0, $fa0 +; LA32D-NEXT: ret +; +; LA64F-LABEL: fabs_f64: +; LA64F: # %bb.0: +; LA64F-NEXT: bstrpick.d $a0, $a0, 62, 0 +; LA64F-NEXT: ret +; +; LA64D-LABEL: fabs_f64: +; LA64D: # %bb.0: +; LA64D-NEXT: fabs.d $fa0, $fa0 +; LA64D-NEXT: ret + %1 = call double @llvm.fabs.f64(double %a) + ret double %1 +} diff --git a/llvm/test/CodeGen/MIR/Generic/basic-blocks.mir b/llvm/test/CodeGen/MIR/Generic/basic-blocks.mir --- a/llvm/test/CodeGen/MIR/Generic/basic-blocks.mir +++ b/llvm/test/CodeGen/MIR/Generic/basic-blocks.mir @@ -31,19 +31,19 @@ # CHECK-LABEL: name: bar # CHECK: body: # CHECK-NEXT: bb.0.start (align 4): -# CHECK: bb.1 (address-taken): +# CHECK: bb.1 (machine-block-address-taken): name: bar body: | bb.0.start (align 4): - bb.1 (address-taken): + bb.1 (machine-block-address-taken): ... --- # CHECK-LABEL: name: test # CHECK: body: -# CHECK-NEXT: bb.0.start (address-taken, align 4): -# CHECK: bb.1 (address-taken, align 4): +# CHECK-NEXT: bb.0.start (machine-block-address-taken, align 4): +# CHECK: bb.1 (machine-block-address-taken, align 4): name: test body: | - bb.0.start (align 4, address-taken): - bb.1 (address-taken, align 4): + bb.0.start (align 4, machine-block-address-taken): + bb.1 (machine-block-address-taken, align 4): ... diff --git a/llvm/test/CodeGen/MIR/X86/block-address-operands.mir b/llvm/test/CodeGen/MIR/X86/block-address-operands.mir --- a/llvm/test/CodeGen/MIR/X86/block-address-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/block-address-operands.mir @@ -62,7 +62,7 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1.block (address-taken): + bb.1.block (ir-block-address-taken %ir-block.block): RET64 ... --- @@ -76,7 +76,7 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1 (address-taken): + bb.1 (%ir-block."quoted block", ir-block-address-taken %ir-block."quoted block"): RET64 ... --- @@ -103,7 +103,7 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1 (address-taken): + bb.1 (%ir-block.0, ir-block-address-taken %ir-block.0): RET64 ... --- @@ -116,6 +116,6 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1.block (address-taken): + bb.1.block (ir-block-address-taken %ir-block.block): RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir --- a/llvm/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir @@ -1,4 +1,5 @@ # RUN: not llc -march=x86-64 -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# --- | @@ -25,6 +26,6 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1.block (address-taken): + bb.1.block (ir-block-address-taken %ir-block.block): RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir --- a/llvm/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir @@ -25,6 +25,6 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1.block (address-taken): + bb.1.block: RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir --- a/llvm/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir @@ -25,6 +25,6 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1.block (address-taken): + bb.1.block: RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir --- a/llvm/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir @@ -25,6 +25,6 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1.block (address-taken): + bb.1.block (ir-block-address-taken %ir-block.block): RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir --- a/llvm/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir @@ -24,6 +24,6 @@ MOV64mr $rip, 1, _, @addr, _, killed $rax JMP64m $rip, 1, _, @addr, _ - bb.1 (address-taken): + bb.1: RET64 ... diff --git a/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll deleted file mode 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll +++ /dev/null @@ -1,138 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 - -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 - -define <16 x i8> @test_4_8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr { -; CHECK-LE-P8-LABEL: test_4_8: -; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha -; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 -; CHECK-LE-P8-NEXT: addi r3, r5, .LCPI0_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs2, 0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, f0 -; CHECK-LE-P8-NEXT: xxswapd v3, f1 -; CHECK-LE-P8-NEXT: xxswapd v4, vs2 -; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 -; CHECK-LE-P8-NEXT: blr -; -; CHECK-LE-P9-LABEL: test_4_8: -; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-LE-P9-NEXT: xxswapd v2, f0 -; CHECK-LE-P9-NEXT: lfd f0, 0(r4) -; CHECK-LE-P9-NEXT: lxv v4, 0(r3) -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 -; CHECK-LE-P9-NEXT: blr -; -; CHECK-BE-P8-LABEL: test_4_8: -; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha -; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 -; CHECK-BE-P8-NEXT: addi r3, r5, .LCPI0_0@toc@l -; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 -; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 -; CHECK-BE-P8-NEXT: blr -; -; CHECK-BE-P9-LABEL: test_4_8: -; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) -; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-P9-NEXT: lxv v4, 0(r3) -; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 -; CHECK-BE-P9-NEXT: blr -; -; CHECK-AIX-64-P8-LABEL: test_4_8: -; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C0(r2) # %const.0 -; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 -; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 -; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 -; CHECK-AIX-64-P8-NEXT: blr -; -; CHECK-AIX-64-P9-LABEL: test_4_8: -; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) -; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) -; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 -; CHECK-AIX-64-P9-NEXT: blr -; -; CHECK-AIX-32-P8-LABEL: test_4_8: -; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r4) -; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 -; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 -; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0 -; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 -; CHECK-AIX-32-P8-NEXT: blr -; -; CHECK-AIX-32-P9-LABEL: test_4_8: -; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 -; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) -; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) -; CHECK-AIX-32-P9-NEXT: xxmrghw v3, vs1, vs0 -; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 -; CHECK-AIX-32-P9-NEXT: blr -entry: - %0 = load <4 x i8>, ptr %a, align 4 - %bc1 = bitcast <4 x i8> %0 to i32 - %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0 - %1 = load <8 x i8>, ptr %b, align 8 - %bc2 = bitcast <8 x i8> %1 to i64 - %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0 - %2 = bitcast <4 x i32> %vecinit3 to <16 x i8> - %3 = bitcast <2 x i64> %vecinit6 to <16 x i8> - %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> - ret <16 x i8> %shuffle -} diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll @@ -0,0 +1,2090 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v16i8_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lbz r3, 0(r3) +; CHECK-LE-P8-NEXT: lbz r4, 0(r4) +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v3, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; CHECK-BE-P8-NEXT: lbz r4, 0(r4) +; CHECK-BE-P8-NEXT: lbz r3, 0(r3) +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI0_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-BE-P9-NEXT: lxsibzx v4, 0, r3 +; CHECK-BE-P9-NEXT: addi r5, r5, .LCPI0_0@toc@l +; CHECK-BE-P9-NEXT: lxv v2, 0(r5) +; CHECK-BE-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lbz r4, 0(r4) +; CHECK-AIX-64-P8-NEXT: lbz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: ld r5, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxsibzx v4, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r5) +; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lbz r4, 0(r4) +; CHECK-AIX-32-P8-NEXT: lbz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r5, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxsibzx v4, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r5) +; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <1 x i8>, ptr %a, align 4 + %bc1 = bitcast <1 x i8> %0 to i8 + %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0 + %1 = load <1 x i8>, ptr %b, align 8 + %bc2 = bitcast <1 x i8> %1 to i8 + %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0 + %2 = bitcast <16 x i8> %vecinit3 to <16 x i8> + %3 = bitcast <16 x i8> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v16i8_none(<16 x i8> %a, i8 %b) { +; CHECK-LE-P8-LABEL: test_v16i8_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrd v4, r5 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-LE-P9-NEXT: vinsertb v2, v3, 15 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r5 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-BE-P9-NEXT: vinsertb v2, v3, 0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: vinsertb v2, v3, 0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-32-P9-NEXT: vinsertb v2, v3, 0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 0 + ret <16 x i8> %vecins +} + +define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrd v3, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = load <16 x i8>, ptr %b, align 4 + %rhs = insertelement <16 x i8> undef, i8 %arg, i32 0 + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) { +; CHECK-LE-P8-LABEL: test_v16i8_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r4, r4, 56 +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r4, r4, 56 +; CHECK-BE-P9-NEXT: sldi r3, r3, 48 +; CHECK-BE-P9-NEXT: mtvsrd v2, r4 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32 +; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) { +; CHECK-LE-P8-LABEL: test_v8i16_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r4, r4, 56 +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r4, r4, 56 +; CHECK-BE-P9-NEXT: sldi r3, r3, 48 +; CHECK-BE-P9-NEXT: mtvsrd v2, r4 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32 +; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrd v3, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C3(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = load <16 x i8>, ptr %b, align 4 + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) { +; CHECK-LE-P8-LABEL: test_v8i16_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrd v4, r5 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-LE-P9-NEXT: vinserth v2, v3, 14 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r5 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-BE-P9-NEXT: vinserth v2, v3, 0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: vinserth v2, v3, 0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-32-P9-NEXT: vinserth v2, v3, 0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 0 + ret <8 x i16> %vecins +} + +define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %b) { +; CHECK-LE-P8-LABEL: test_v16i8_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws v3, r4 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 32 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: mtvsrd v3, r4 +; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtvsrws v3, r4 +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) { +; CHECK-LE-P8-LABEL: test_v4i32_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: mtvsrws v3, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r4, r4, 56 +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r4, r4, 56 +; CHECK-BE-P9-NEXT: mtvsrws v3, r3 +; CHECK-BE-P9-NEXT: mtvsrd v2, r4 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r3 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32 +; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) { +; CHECK-LE-P8-LABEL: test_none_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrwz v4, r5 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_1@toc@ha +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_1@toc@l +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprwz f0, r5 +; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 8 +; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r5 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI9_1@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI9_1@toc@l +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r5 +; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 4 +; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 4 +; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 4 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %conv = trunc i64 %b to i32 + %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1 + %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3 + ret <4 x i32> %vecins2 +} + +define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v4i32_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI10_0@toc@ha +; CHECK-LE-P8-NEXT: lbzx r4, 0, r4 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI10_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-LE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vspltb v3, v3, 7 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lbzx r4, 0, r4 +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI10_0@toc@ha +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: addi r4, r5, .LCPI10_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-BE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vspltb v3, v3, 7 +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4 +; CHECK-AIX-64-P8-NEXT: ld r5, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vspltb v3, v3, 7 +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lbzx r4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C5(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vspltb v3, v3, 7 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i8>, ptr %a, align 4 + %bc1 = bitcast <4 x i8> %0 to i32 + %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0 + %1 = load <1 x i8>, ptr %b, align 8 + %bc2 = bitcast <1 x i8> %1 to i8 + %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0 + %2 = bitcast <4 x i32> %vecinit3 to <16 x i8> + %3 = bitcast <16 x i8> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %b) { +; CHECK-LE-P8-LABEL: test_v16i8_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: mtvsrd v3, r4 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtvsrd v3, r4 +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <2 x i64> %b, i64 %arg1, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) { +; CHECK-LE-P8-LABEL: test_v2i64_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r4, r4, 56 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r4, r4, 56 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: mtvsrd v2, r4 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: stb r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r5, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_1_2: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI13_0@toc@ha +; CHECK-LE-P8-NEXT: lbzx r3, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI13_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-LE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_1_2: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI13_0@toc@ha +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI13_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_1_2: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lbzx r3, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_1_2: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_1_2: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lbzx r3, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_1_2: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_1_2: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lbzx r3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C6(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_1_2: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <1 x i8>, ptr %a, align 4 + %bc1 = bitcast <1 x i8> %0 to i8 + %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <16 x i8> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI14_0@toc@ha +; CHECK-LE-P8-NEXT: lbzx r3, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI14_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-LE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI14_0@toc@ha +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lbzx r3, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lbzx r3, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lbzx r3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C7(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <1 x i8>, ptr %a, align 4 + %bc1 = bitcast <1 x i8> %0 to i8 + %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <16 x i8> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v2i64_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI15_0@toc@ha +; CHECK-LE-P8-NEXT: lbzx r4, 0, r4 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI15_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-LE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI15_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vspltb v3, v3, 7 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lbzx r4, 0, r4 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: vspltb v3, v3, 7 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: vspltb v3, v3, 7 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lbzx r4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: xxspltw v3, vs0, 1 +; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxvwsx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: vspltb v3, v3, 7 +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <8 x i8>, ptr %a, align 4 + %bc1 = bitcast <8 x i8> %0 to i64 + %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0 + %1 = load <1 x i8>, ptr %b, align 8 + %bc2 = bitcast <1 x i8> %1 to i8 + %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0 + %2 = bitcast <2 x i64> %vecinit3 to <16 x i8> + %3 = bitcast <16 x i8> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) { +; CHECK-LE-P8-LABEL: test_v8i16_v8i16rhs: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v3, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v8i16rhs: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrd v2, r3 +; CHECK-LE-P9-NEXT: mtvsrd v3, r4 +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v8i16rhs: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI16_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v3, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI16_0@toc@l +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v8i16rhs: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: addis r5, r2, .LCPI16_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v3, r4 +; CHECK-BE-P9-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P9-NEXT: addi r5, r5, .LCPI16_0@toc@l +; CHECK-BE-P9-NEXT: lxv v2, 0(r5) +; CHECK-BE-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: ld r5, L..C4(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r5) +; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16rhs: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16rhs: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %arg1) { +; CHECK-LE-P8-LABEL: test_v8i16_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r7 +; CHECK-LE-P8-NEXT: mtfprd f1, r8 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r7 +; CHECK-LE-P9-NEXT: mtvsrws v3, r8 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r7, 48 +; CHECK-BE-P8-NEXT: sldi r4, r8, 32 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: mtvsrd v3, r4 +; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r7, 48 +; CHECK-BE-P9-NEXT: mtvsrws v3, r8 +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> %a, i16 %arg, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %arg1) { +; CHECK-LE-P8-LABEL: test_v8i16_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r7 +; CHECK-LE-P8-NEXT: mtfprd f1, r8 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r7 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r8 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r7, 48 +; CHECK-BE-P8-NEXT: mtvsrd v3, r8 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r7, 48 +; CHECK-BE-P9-NEXT: mtvsrd v3, r8 +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> %a, i16 %arg, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <2 x i64> %b, i64 %arg1, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v4i32(i32 %arg, i32 %arg1, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LE-P8-LABEL: test_v4i32_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrgow v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrgow v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> %a, i32 %arg, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) { +; CHECK-LE-P8-LABEL: test_v4i32_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: mtvsrws v2, r3 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: sldi r4, r4, 48 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: mtvsrd v3, r4 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrws v2, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrws v2, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v2i64_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C8(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxvwsx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <8 x i8>, ptr %a, align 4 + %bc1 = bitcast <8 x i8> %0 to i64 + %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <2 x i64> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v4i32(i64 %arg, i32 %arg1, <2 x i64> %a, <4 x i32> %b) { +; CHECK-LE-P8-LABEL: test_v2i64_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrglw v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 32 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghw v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghw v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <2 x i64> %a, i64 %arg, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) { +; CHECK-LE-P8-LABEL: test_v2i64_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: sth r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r5, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v4i32_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI24_0@toc@ha +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 +; CHECK-LE-P8-NEXT: addi r3, r5, .LCPI24_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs2, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, f1 +; CHECK-LE-P8-NEXT: xxswapd v4, vs2 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI24_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI24_0@toc@l +; CHECK-LE-P9-NEXT: xxswapd v2, f0 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI24_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r3, r5, .LCPI24_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI24_0@toc@ha +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI24_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C9(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r4) +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw v3, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i8>, ptr %a, align 4 + %bc1 = bitcast <4 x i8> %0 to i32 + %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0 + %1 = load <8 x i8>, ptr %b, align 8 + %bc2 = bitcast <8 x i8> %1 to i64 + %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0 + %2 = bitcast <4 x i32> %vecinit3 to <16 x i8> + %3 = bitcast <2 x i64> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll @@ -0,0 +1,1909 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define <2 x i64> @test_v16i8_v16i8(i8 %arg1, i8 %arg) { +; CHECK-LE-P8-LABEL: test_v16i8_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <16 x i8> undef, i8 %arg, i32 0 + %rhs = bitcast <16 x i8> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v16i8_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v16i8_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtfprd f1, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtfprd f1, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v8i16_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v16i8_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 32 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v4i32_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 32 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v4i32_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v16i8_v2i64(i8 %arg1, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v16i8_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd v2, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtfprd f1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C1(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8 +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v2i64_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd v2, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r4 +; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtvsrdd v2, r4, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r4, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: stw r4, -48(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r5, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -48(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs2, -48(r1) +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_none_v2i64(ptr nocapture noundef readonly %b, i64 %arg) { +; CHECK-LE-P8-LABEL: test_none_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r3) +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-BE-P8-NEXT: mtfprd f0, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r3) +; CHECK-BE-P9-NEXT: mtfprd f0, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r6, L..C2(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v4, v2 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8 +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = load <2 x i64>, ptr %b, align 4 + %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v2i64_none(ptr nocapture noundef readonly %b, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v2i64_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r3) +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r4 +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-BE-P8-NEXT: xxspltd v3, vs0, 0 +; CHECK-BE-P8-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r3) +; CHECK-BE-P9-NEXT: mtvsrdd v3, r4, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4 +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v3, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: mtvsrdd v3, r4, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = load <2 x i64>, ptr %b, align 4 + %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0 + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_v8i16(i16 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: sldi r4, r4, 32 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 48 +; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_v2i64(i16 %arg1, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd v2, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 48 +; CHECK-BE-P9-NEXT: mtfprd f1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C5(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8 +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_v4i32(i32 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v4i32_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v4i32_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: mtvsrws vs0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: sldi r4, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrws vs0, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtfprd f1, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrws vs0, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v4i32_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: mtvsrws vs0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrws vs0, r3 +; CHECK-BE-P9-NEXT: mtfprd f1, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrws vs0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -48(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -48(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -48(r1) +; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v2i64_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: ld r3, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrghd v3, vs0, vs1 +; CHECK-LE-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: ld r3, 0(r3) +; CHECK-LE-P9-NEXT: lfd f1, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v3, vs1, vs0 +; CHECK-LE-P9-NEXT: vaddudm v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: lfdx f0, 0, r4 +; CHECK-BE-P8-NEXT: xxmrghd v3, v2, vs0 +; CHECK-BE-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: lfd f0, 0(r4) +; CHECK-BE-P9-NEXT: xxmrghd v3, v2, vs0 +; CHECK-BE-P9-NEXT: vaddudm v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v3, v2, vs0 +; CHECK-AIX-64-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxmrghd v3, v2, vs0 +; CHECK-AIX-64-P9-NEXT: vaddudm v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r3) +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -32 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 4(r4) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r3, -48(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -64 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stw r3, -64(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -48 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs3, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs3, vs2 +; CHECK-AIX-32-P8-NEXT: xxmrghd v3, v2, vs0 +; CHECK-AIX-32-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r5, 4(r3) +; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -48(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -64(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -64(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: xxmrghd v3, v2, vs0 +; CHECK-AIX-32-P9-NEXT: vaddudm v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <8 x i8>, ptr %a, align 8 + %bc1 = bitcast <8 x i8> %0 to i64 + %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0 + %1 = load <8 x i8>, ptr %b, align 8 + %bc2 = bitcast <8 x i8> %1 to i64 + %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0 + %2 = bitcast <2 x i64> %vecinit3 to <2 x i64> + %3 = bitcast <2 x i64> %vecinit6 to <2 x i64> + %shuffle = shufflevector <2 x i64> %2, <2 x i64> %3, <2 x i32> + %4 = add <2 x i64> %shuffle, %2 + ret <2 x i64> %4 +} + +define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v2i64_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtvsrws vs0, r4 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 +; CHECK-BE-P8-NEXT: mtfprwz f0, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r4 +; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -48 +; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r5, -48(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <2 x i64> undef, i64 %arg1, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v2i64_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 +; CHECK-BE-P8-NEXT: mtfprwz f0, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r4 +; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -48 +; CHECK-AIX-32-P8-NEXT: sth r5, -48(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1) +; CHECK-AIX-32-P9-NEXT: sth r5, -48(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <2 x i64> undef, i64 %arg1, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll @@ -0,0 +1,1445 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define void @test_none_v8i16(ptr %a) { +; CHECK-LE-P8-LABEL: test_none_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: mtvsrd v4, r4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lxsd v3, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: xxswapd vs0, v2 +; CHECK-LE-P9-NEXT: stfd f0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-BE-P9-NEXT: stfd f0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-AIX-64-P9-NEXT: stfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lwz r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: stxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i32>, ptr %a + %tmp1_1 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_1, <2 x i32> + store <2 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v8i16_none(ptr %a) { +; CHECK-LE-P8-LABEL: test_v8i16_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <4 x i32>, ptr %a, align 1 + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %1, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { +; CHECK-LE-P8-LABEL: test_none_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-P8-NEXT: mffprwz r3, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: li r3, 0 +; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-LE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r5) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: mffprwz r4, f0 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: li r3, 0 +; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r5) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mffprwz r5, f0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: li r4, 0 +; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P9-NEXT: ld r4, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r4) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lwz r4, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r4) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = extractelement <2 x i32> %vec, i64 0 + %1 = bitcast i32 %0 to <2 x i16> + %2 = shufflevector <2 x i16> %1, <2 x i16> %1, <8 x i32> + %3 = shufflevector <2 x i32> %vec, <2 x i32> %vec, <4 x i32> + %4 = bitcast <4 x i32> %3 to <8 x i16> + %5 = shufflevector <8 x i16> %4, <8 x i16> %2, <8 x i32> + store <8 x i16> %5, ptr %ptr1, align 16 + ret void +} + +define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) { +; CHECK-LE-P8-LABEL: test_v4i32_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-P8-NEXT: mffprwz r3, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: li r3, 0 +; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-LE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r5) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: mffprwz r4, f0 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: li r3, 0 +; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r5) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mffprwz r5, f0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: li r4, 0 +; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r4) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r4) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = extractelement <2 x i32> %vec, i64 0 + %1 = bitcast i32 %0 to <2 x i16> + %2 = shufflevector <2 x i16> %1, <2 x i16> %1, <8 x i32> + %3 = shufflevector <2 x i32> %vec, <2 x i32> %vec, <4 x i32> + %4 = bitcast <4 x i32> %3 to <8 x i16> + %5 = shufflevector <8 x i16> %2, <8 x i16> %4, <8 x i32> + store <8 x i16> %5, ptr %ptr1, align 16 + ret void +} + +define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_addr #0 { +; CHECK-LE-P8-LABEL: test_none_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI4_1@toc@ha +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI4_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; +; CHECK-LE-P9-LABEL: test_none_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v3, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-LE-P9-NEXT: mtfprwz f0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; +; CHECK-BE-P8-LABEL: test_none_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI4_1@toc@ha +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI4_1@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; +; CHECK-BE-P9-LABEL: test_none_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v3, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-BE-P9-NEXT: mtfprwz f0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 0 +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; +; CHECK-AIX-64-P8-LABEL: test_none_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r4 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; +; CHECK-AIX-64-P9-LABEL: test_none_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; +; CHECK-AIX-32-P8-LABEL: test_none_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C2(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v5, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v5, v2, v4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; +; CHECK-AIX-32-P9-LABEL: test_none_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +entry: + %0 = load <2 x i32>, ptr %ptr, align 4 + %tmp = insertelement <2 x i32> %vec, i32 %v1, i32 0 + %1 = shufflevector <2 x i32> %0, <2 x i32> %tmp, <4 x i32> + store <4 x i32> %1, ptr undef, align 4 + unreachable +} + +define void @test_v2i64_none() { +; CHECK-LE-P8-LABEL: test_v2i64_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, 0, r3 +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i32>, ptr undef, align 4 + %1 = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> + store <4 x i32> %1, ptr undef, align 4 + ret void +} + +define void @test_v8i16_v8i16(ptr %a) { +; CHECK-LE-P8-LABEL: test_v8i16_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lxsihzx f1, 0, r3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: mtfprwz f0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lxsihzx f1, 0, r3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsihzx f1, 0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P8-NEXT: mtfprwz f1, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxsihzx f1, 0, r3 +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i8>, ptr %a, align 1 + %tmp1_1 = bitcast <2 x i8> %1 to i16 + %tmp1_2 = insertelement <8 x i16> undef, i16 %tmp1_1, i32 0 + %tmp1_3 = bitcast <8 x i16> %tmp1_2 to <4 x i32> + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_3, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v8i16_v4i32(ptr %a) { +; CHECK-LE-P8-LABEL: test_v8i16_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, v2, vs0 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, v2, vs0 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i16>, ptr %a, align 4 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_2, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v8i16_v2i64(ptr %a) { +; CHECK-LE-P8-LABEL: test_v8i16_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, v2, vs0 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, v2, vs0 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i16>, ptr %a, align 8 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_2, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) { +; CHECK-LE-P8-LABEL: test_v4i32_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI9_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI9_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-BE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C4(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C4(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %load1 = load <4 x i8>, ptr %a + %load2 = load <4 x i8>, ptr %b + %shuffle1 = shufflevector <4 x i8> %load1, <4 x i8> %load2, <8 x i32> + %shuffle2 = shufflevector <8 x i8> %shuffle1, <8 x i8> %shuffle1, <16 x i32> + ret <16 x i8> %shuffle2 +} + +define void @test_v4i32_v8i16(ptr %a) { +; CHECK-LE-P8-LABEL: test_v4i32_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i16>, ptr %a, align 4 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_3, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v4i32_v2i64(ptr %a) { +; CHECK-LE-P8-LABEL: test_v4i32_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, f1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: xxswapd vs1, f1 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr undef, align 8 + %tmp0_1 = bitcast <2 x i16> %0 to i32 + %tmp0_2 = insertelement <4 x i32> undef, i32 %tmp0_1, i32 0 + %1 = load <2 x i16>, ptr %a, align 4 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_2, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v2i64_v2i64(ptr %a) { +; CHECK-LE-P8-LABEL: test_v2i64_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r3 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: lfd f1, 0(r3) +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: lfdx f1, 0, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: lfd f1, 0(r3) +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lfdx f1, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: lfd f1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r3) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f2, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs2, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3) +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i32>, ptr undef, align 4 + %1 = load <2 x i32>, ptr %a, align 4 + %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v2i64_v4i32(ptr %a) { +; CHECK-LE-P8-LABEL: test_v2i64_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, f1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: xxswapd vs1, f1 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr undef, align 8 + %tmp0_1 = bitcast <2 x i16> %0 to i32 + %tmp0_2 = insertelement <4 x i32> undef, i32 %tmp0_1, i32 0 + %1 = load <2 x i16>, ptr %a, align 4 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp0_2, <4 x i32> %tmp1_2, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v2i64_v8i16(ptr %a) { +; CHECK-LE-P8-LABEL: test_v2i64_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i16>, ptr %a, align 8 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_3, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll @@ -0,0 +1,1554 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %arg) { +; CHECK-LE-P8-LABEL: test_none_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %load0.tmp = load <2 x i8>, ptr %a0 + %load0.tmp1 = bitcast <2 x i8> %load0.tmp to i16 + %load0 = insertelement <8 x i16> %b, i16 %load0.tmp1, i64 0 + %load1.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0 + %load1 = bitcast <16 x i8> %load1.tmp to <8 x i16> + %shuff = shufflevector <8 x i16> %load0, <8 x i16> %load1, <8 x i32> + store <8 x i16> %shuff, ptr undef + ret void +} + +define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrd v4, r9 +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI1_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-P9-NEXT: mtvsrwz v4, r9 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-LE-P9-NEXT: vinsertb v2, v4, 15 +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r9 +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI1_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v4, r9 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-P9-NEXT: vinsertb v2, v4, 0 +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C2(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-64-P9-NEXT: vinsertb v2, v4, 0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-32-P9-NEXT: vinsertb v2, v4, 0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %load0.tmp = load <2 x i8>, ptr %a0 + %load0.tmp1 = bitcast <2 x i8> %load0.tmp to i16 + %load0 = insertelement <8 x i16> %b, i16 %load0.tmp1, i64 0 + %load1.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0 + %load1 = bitcast <16 x i8> %load1.tmp to <8 x i16> + %shuff = shufflevector <8 x i16> %load0, <8 x i16> %load1, <8 x i32> + store <8 x i16> %shuff, ptr undef + ret void +} + +define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 { +; CHECK-LE-P8-LABEL: test_none_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrd v3, r5 +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P9-NEXT: mtvsrd v3, r5 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_1@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_1@toc@l +; CHECK-LE-P9-NEXT: vperm v3, v3, v3, v4 +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: xxswapd vs0, v2 +; CHECK-LE-P9-NEXT: stfd f0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v3, r5 +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-BE-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: stxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_1@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_1@toc@l +; CHECK-BE-P9-NEXT: vperm v3, v3, v3, v4 +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: stxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r5 +; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: stxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r5 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.1 +; CHECK-AIX-64-P9-NEXT: vperm v3, v3, v3, v4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: stxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: stb r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: vmrghh v3, v3, v3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, -12(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lwz r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: stb r5, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vmrghh v3, v3, v3 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, -12(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lwz r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr %ptr, align 4 + %tmp = insertelement <4 x i8> undef, i8 %v3, i32 0 + %tmp0 = bitcast <4 x i8> %tmp to <2 x i16> + %1 = shufflevector <2 x i16> %0, <2 x i16> %tmp0, <4 x i32> + store <4 x i16> %1, ptr undef, align 4 + ret void +} + +define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) { +; CHECK-LE-P8-LABEL: test_v4i32_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI3_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI3_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-P9-NEXT: lxv v3, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C4(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr %ptr1, align 1 + %1 = load <2 x i16>, ptr %ptr2, align 1 + %shuffle1 = shufflevector <2 x i16> %0, <2 x i16> %1, <4 x i32> + %2 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %2, ptr undef, align 16 + ret void +} + +define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) { +; CHECK-LE-P8-LABEL: test_none_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs1 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-LE-P9-NEXT: lxv v3, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_1@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_1@toc@l +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-BE-P9-NEXT: lxv v3, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C6(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C5(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i16>, ptr %ptr1, align 1 + %1 = load <4 x i32>, ptr %ptr2, align 1 + %bc = trunc <4 x i32> %1 to <4 x i16> + %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> %bc, <4 x i32> + %2 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %2, ptr undef, align 16 + ret void +} + +define void @test_v2i64_none(ptr nocapture readonly %ptr1) { +; CHECK-LE-P8-LABEL: test_v2i64_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI5_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI5_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-BE-P9-NEXT: lxv v3, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, 4(r3) +; CHECK-AIX-32-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3) +; CHECK-AIX-32-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i16>, ptr %ptr1, align 1 + %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> undef, <4 x i32> + %1 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %1, ptr undef, align 16 + ret void +} + +define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) { +; CHECK-LE-P8-LABEL: test_v8i16_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lhz r4, 0(r4) +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v4, r4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: lhz r4, 0(r4) +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-BE-P8-NEXT: vperm v2, v3, v4, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r4) +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C7(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r4) +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C6(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %load1 = load <2 x i8>, ptr %a + %load2 = load <2 x i8>, ptr %b + %shuffle1 = shufflevector <2 x i8> %load1, <2 x i8> %load2, <8 x i32> + %shuffle2 = shufflevector <8 x i8> %shuffle1, <8 x i8> %shuffle1, <16 x i32> + ret <16 x i8> %shuffle2 +} + +define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v8i16_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, f0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1 +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 4 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v8i16_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, f0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r4 +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) { +; CHECK-LE-P8-LABEL: test_v4i32_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_1@toc@ha +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI9_0@toc@l +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_1@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_1@toc@l +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI9_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-BE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C9(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C8(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C8(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr %ptr1, align 1 + %1 = load <2 x i16>, ptr %ptr2, align 1 + %shuffle1 = shufflevector <2 x i16> %0, <2 x i16> %1, <4 x i32> + %2 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %2, ptr undef, align 16 + ret void +} + +define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v4i32_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, f0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1 +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 4 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v4i32_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, f1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, f0 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a, align 4 + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) { +; CHECK-LE-P8-LABEL: test_v2i64_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI12_0@toc@ha +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI12_1@toc@ha +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI12_0@toc@l +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI12_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI12_0@toc@ha +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI12_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI12_1@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI12_1@toc@l +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI12_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI12_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI12_0@toc@ha +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI12_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C10(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C10(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i16>, ptr %ptr1, align 1 + %1 = load <4 x i16>, ptr %ptr2, align 1 + %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> + %2 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %2, ptr undef, align 16 + ret void +} + +define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v2i64_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, f1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, f0 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C11(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C10(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a, align 4 + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v2i64_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, f0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r4 +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> + ret <16 x i8> %shuffle +} diff --git a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll @@ -241,3 +241,40 @@ %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 } + +define i32 @select_fcmp_oeq_1_2(double %a, double %b) { +; CHECK-LABEL: select_fcmp_oeq_1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret + %1 = fcmp fast oeq double %a, %b + %2 = select i1 %1, i32 1, i32 2 + ret i32 %2 +} + +define signext i32 @select_fcmp_uge_negone_zero(double %a, double %b) nounwind { +; CHECK-LABEL: select_fcmp_uge_negone_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: fle.d a0, fa0, fa1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %1 = fcmp ugt double %a, %b + %2 = select i1 %1, i32 -1, i32 0 + ret i32 %2 +} + +define signext i32 @select_fcmp_uge_1_2(double %a, double %b) nounwind { +; CHECK-LABEL: select_fcmp_uge_1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: fle.d a0, fa0, fa1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret + %1 = fcmp ugt double %a, %b + %2 = select i1 %1, i32 1, i32 2 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll @@ -241,3 +241,40 @@ %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 } + +define i32 @select_fcmp_oeq_1_2(float %a, float %b) { +; CHECK-LABEL: select_fcmp_oeq_1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret + %1 = fcmp fast oeq float %a, %b + %2 = select i1 %1, i32 1, i32 2 + ret i32 %2 +} + +define signext i32 @select_fcmp_uge_negone_zero(float %a, float %b) nounwind { +; CHECK-LABEL: select_fcmp_uge_negone_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: fle.s a0, fa0, fa1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %1 = fcmp ugt float %a, %b + %2 = select i1 %1, i32 -1, i32 0 + ret i32 %2 +} + +define signext i32 @select_fcmp_uge_1_2(float %a, float %b) nounwind { +; CHECK-LABEL: select_fcmp_uge_1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: fle.s a0, fa0, fa1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret + %1 = fcmp ugt float %a, %b + %2 = select i1 %1, i32 1, i32 2 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll --- a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll +++ b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll @@ -9,14 +9,14 @@ ; RV32I-NEXT: lw a2, 8(a0) ; RV32I-NEXT: lw a3, 4(a0) ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: seqz a2, a2 -; RV32I-NEXT: seqz a3, a3 -; RV32I-NEXT: seqz a4, a4 -; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: snez a1, a1 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: snez a3, a3 +; RV32I-NEXT: snez a4, a4 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: sw a2, 8(a0) ; RV32I-NEXT: sw a3, 4(a0) diff --git a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll @@ -241,3 +241,40 @@ %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 } + +define i32 @select_fcmp_oeq_1_2(half %a, half %b) { +; CHECK-LABEL: select_fcmp_oeq_1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret + %1 = fcmp fast oeq half %a, %b + %2 = select i1 %1, i32 1, i32 2 + ret i32 %2 +} + +define signext i32 @select_fcmp_uge_negone_zero(half %a, half %b) nounwind { +; CHECK-LABEL: select_fcmp_uge_negone_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: fle.h a0, fa0, fa1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %1 = fcmp ugt half %a, %b + %2 = select i1 %1, i32 -1, i32 0 + ret i32 %2 +} + +define signext i32 @select_fcmp_uge_1_2(half %a, half %b) nounwind { +; CHECK-LABEL: select_fcmp_uge_1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: fle.h a0, fa0, fa1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: li a1, 2 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret + %1 = fcmp ugt half %a, %b + %2 = select i1 %1, i32 1, i32 2 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -727,9 +727,9 @@ ; ; RV64ZBB-LABEL: zext_abs32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sext.w a0, a0 -; RV64ZBB-NEXT: negw a1, a0 -; RV64ZBB-NEXT: max a0, a0, a1 +; RV64ZBB-NEXT: sext.w a1, a0 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: max a0, a1, a0 ; RV64ZBB-NEXT: ret ; ; RV64ZBT-LABEL: zext_abs32: diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -9,8 +9,7 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I-LABEL: ctlz_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: beqz a1, .LBB0_2 +; RV64I-NEXT: beqz a0, .LBB0_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill @@ -63,8 +62,7 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I-LABEL: log2_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: beqz a1, .LBB1_2 +; RV64I-NEXT: beqz a0, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill @@ -368,34 +366,34 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind { ; RV64I-LABEL: cttz_i32: ; RV64I: # %bb.0: +; RV64I-NEXT: beqz a0, .LBB6_4 +; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: sext.w s0, a0 -; RV64I-NEXT: beqz s0, .LBB6_3 -; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: neg a1, a0 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: and a0, s0, a0 ; RV64I-NEXT: lui a1, 30667 ; RV64I-NEXT: addiw a1, a1, 1329 ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: li a0, 32 -; RV64I-NEXT: beqz s0, .LBB6_4 +; RV64I-NEXT: beqz s0, .LBB6_3 ; RV64I-NEXT: # %bb.2: # %cond.false ; RV64I-NEXT: srliw a0, a1, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI6_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: j .LBB6_4 -; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: li a0, 32 -; RV64I-NEXT: .LBB6_4: # %cond.end +; RV64I-NEXT: .LBB6_3: # %cond.false ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; RV64I-NEXT: .LBB6_4: +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: cttz_i32: ; RV64ZBB: # %bb.0: @@ -928,7 +926,7 @@ define signext i32 @abs_i32_sext(i32 signext %x) { ; RV64I-LABEL: abs_i32_sext: ; RV64I: # %bb.0: -; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srai a1, a0, 31 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1725,17 +1725,10 @@ ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi sp, sp, -16 -; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV64ZVE32F-NEXT: sw a1, 12(sp) -; RV64ZVE32F-NEXT: sw a0, 8(sp) -; RV64ZVE32F-NEXT: addi a0, sp, 12 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; RV64ZVE32F-NEXT: vle32.v v9, (a0) -; RV64ZVE32F-NEXT: addi a0, sp, 8 -; RV64ZVE32F-NEXT: vle32.v v8, (a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 @@ -1744,7 +1737,6 @@ ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4 ; RV64ZVE32F-NEXT: .LBB24_2: # %else2 -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu @@ -1755,7 +1747,6 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vse32.v v8, (a3) -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i32> call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %tval, <2 x i32*> %ptrs, i32 4, <2 x i1> %m) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll @@ -123,8 +123,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %v) ret i1 %red @@ -152,8 +152,8 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v) ret i1 %red @@ -166,8 +166,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v2i1(<2 x i1> %v) ret i1 %red @@ -181,8 +181,8 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v2i1(<2 x i1> %v) ret i1 %red @@ -196,8 +196,8 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v2i1(<2 x i1> %v) ret i1 %red @@ -210,8 +210,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v2i1(<2 x i1> %v) ret i1 %red @@ -224,8 +224,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %v) ret i1 %red @@ -253,8 +253,8 @@ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v) ret i1 %red @@ -267,8 +267,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %v) ret i1 %red @@ -282,8 +282,8 @@ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v4i1(<4 x i1> %v) ret i1 %red @@ -297,8 +297,8 @@ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v4i1(<4 x i1> %v) ret i1 %red @@ -311,8 +311,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v4i1(<4 x i1> %v) ret i1 %red @@ -325,8 +325,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %v) ret i1 %red @@ -354,8 +354,8 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v) ret i1 %red @@ -368,8 +368,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v8i1(<8 x i1> %v) ret i1 %red @@ -383,8 +383,8 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v8i1(<8 x i1> %v) ret i1 %red @@ -398,8 +398,8 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v8i1(<8 x i1> %v) ret i1 %red @@ -412,8 +412,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v8i1(<8 x i1> %v) ret i1 %red @@ -426,8 +426,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %v) ret i1 %red @@ -455,8 +455,8 @@ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v) ret i1 %red @@ -469,8 +469,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v16i1(<16 x i1> %v) ret i1 %red @@ -484,8 +484,8 @@ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v16i1(<16 x i1> %v) ret i1 %red @@ -499,8 +499,8 @@ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v16i1(<16 x i1> %v) ret i1 %red @@ -513,8 +513,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v16i1(<16 x i1> %v) ret i1 %red @@ -528,8 +528,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmor.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: seqz a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_or_v32i1: @@ -537,8 +537,8 @@ ; LMULMAX8-NEXT: li a0, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: seqz a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %v) ret i1 %red @@ -576,8 +576,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_and_v32i1: @@ -586,8 +586,8 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v) ret i1 %red @@ -601,8 +601,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmor.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: seqz a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_umax_v32i1: @@ -610,8 +610,8 @@ ; LMULMAX8-NEXT: li a0, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: seqz a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v32i1(<32 x i1> %v) ret i1 %red @@ -625,8 +625,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_smax_v32i1: @@ -635,8 +635,8 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v32i1(<32 x i1> %v) ret i1 %red @@ -650,8 +650,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_umin_v32i1: @@ -660,8 +660,8 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v32i1(<32 x i1> %v) ret i1 %red @@ -675,8 +675,8 @@ ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmor.mm v8, v0, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: seqz a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_smin_v32i1: @@ -684,8 +684,8 @@ ; LMULMAX8-NEXT: li a0, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: seqz a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v32i1(<32 x i1> %v) ret i1 %red @@ -701,8 +701,8 @@ ; LMULMAX1-NEXT: vmor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmor.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: seqz a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_or_v64i1: @@ -710,8 +710,8 @@ ; LMULMAX8-NEXT: li a0, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: seqz a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %v) ret i1 %red @@ -753,8 +753,8 @@ ; LMULMAX1-NEXT: vmand.mm v9, v0, v9 ; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_and_v64i1: @@ -763,8 +763,8 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v) ret i1 %red @@ -780,8 +780,8 @@ ; LMULMAX1-NEXT: vmor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmor.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: seqz a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_umax_v64i1: @@ -789,8 +789,8 @@ ; LMULMAX8-NEXT: li a0, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: seqz a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.v64i1(<64 x i1> %v) ret i1 %red @@ -806,8 +806,8 @@ ; LMULMAX1-NEXT: vmand.mm v9, v0, v9 ; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_smax_v64i1: @@ -816,8 +816,8 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.v64i1(<64 x i1> %v) ret i1 %red @@ -833,8 +833,8 @@ ; LMULMAX1-NEXT: vmand.mm v9, v0, v9 ; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: seqz a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_umin_v64i1: @@ -843,8 +843,8 @@ ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; LMULMAX8-NEXT: vmnot.m v8, v0 ; LMULMAX8-NEXT: vcpop.m a0, v8 -; LMULMAX8-NEXT: seqz a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.v64i1(<64 x i1> %v) ret i1 %red @@ -860,8 +860,8 @@ ; LMULMAX1-NEXT: vmor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmor.mm v8, v9, v8 ; LMULMAX1-NEXT: vcpop.m a0, v8 -; LMULMAX1-NEXT: snez a0, a0 -; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: seqz a0, a0 +; LMULMAX1-NEXT: addi a0, a0, -1 ; LMULMAX1-NEXT: ret ; ; LMULMAX8-LABEL: vreduce_smin_v64i1: @@ -869,8 +869,8 @@ ; LMULMAX8-NEXT: li a0, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; LMULMAX8-NEXT: vcpop.m a0, v0 -; LMULMAX8-NEXT: snez a0, a0 -; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: seqz a0, a0 +; LMULMAX8-NEXT: addi a0, a0, -1 ; LMULMAX8-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.v64i1(<64 x i1> %v) ret i1 %red diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll @@ -9,8 +9,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv1i1( %v) ret i1 %red @@ -38,8 +38,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv1i1( %v) ret i1 %red @@ -52,8 +52,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv1i1( %v) ret i1 %red @@ -67,8 +67,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv1i1( %v) ret i1 %red @@ -82,8 +82,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv1i1( %v) ret i1 %red @@ -96,8 +96,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv1i1( %v) ret i1 %red @@ -110,8 +110,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv2i1( %v) ret i1 %red @@ -139,8 +139,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv2i1( %v) ret i1 %red @@ -153,8 +153,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv2i1( %v) ret i1 %red @@ -168,8 +168,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv2i1( %v) ret i1 %red @@ -183,8 +183,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv2i1( %v) ret i1 %red @@ -197,8 +197,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv2i1( %v) ret i1 %red @@ -211,8 +211,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv4i1( %v) ret i1 %red @@ -240,8 +240,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv4i1( %v) ret i1 %red @@ -254,8 +254,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv4i1( %v) ret i1 %red @@ -269,8 +269,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv4i1( %v) ret i1 %red @@ -284,8 +284,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv4i1( %v) ret i1 %red @@ -298,8 +298,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv4i1( %v) ret i1 %red @@ -312,8 +312,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv8i1( %v) ret i1 %red @@ -341,8 +341,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv8i1( %v) ret i1 %red @@ -355,8 +355,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv8i1( %v) ret i1 %red @@ -370,8 +370,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv8i1( %v) ret i1 %red @@ -385,8 +385,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv8i1( %v) ret i1 %red @@ -399,8 +399,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv8i1( %v) ret i1 %red @@ -413,8 +413,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv16i1( %v) ret i1 %red @@ -442,8 +442,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv16i1( %v) ret i1 %red @@ -456,8 +456,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv16i1( %v) ret i1 %red @@ -471,8 +471,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv16i1( %v) ret i1 %red @@ -486,8 +486,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv16i1( %v) ret i1 %red @@ -500,8 +500,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv16i1( %v) ret i1 %red @@ -514,8 +514,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv32i1( %v) ret i1 %red @@ -543,8 +543,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv32i1( %v) ret i1 %red @@ -557,8 +557,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv32i1( %v) ret i1 %red @@ -572,8 +572,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv32i1( %v) ret i1 %red @@ -587,8 +587,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv32i1( %v) ret i1 %red @@ -601,8 +601,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv32i1( %v) ret i1 %red @@ -615,8 +615,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.or.nxv64i1( %v) ret i1 %red @@ -644,8 +644,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.and.nxv64i1( %v) ret i1 %red @@ -658,8 +658,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umax.nxv64i1( %v) ret i1 %red @@ -673,8 +673,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smax.nxv64i1( %v) ret i1 %red @@ -688,8 +688,8 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu ; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vcpop.m a0, v8 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.umin.nxv64i1( %v) ret i1 %red @@ -702,8 +702,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: ret %red = call i1 @llvm.vector.reduce.smin.nxv64i1( %v) ret i1 %red diff --git a/llvm/test/CodeGen/RISCV/select-const.ll b/llvm/test/CodeGen/RISCV/select-const.ll --- a/llvm/test/CodeGen/RISCV/select-const.ll +++ b/llvm/test/CodeGen/RISCV/select-const.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -target-abi=ilp32 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: | FileCheck -check-prefixes=RV32,RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: | FileCheck -check-prefixes=RV32,RV32IF %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -target-abi=ilp32 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IBT %s +; RUN: | FileCheck -check-prefixes=RV32,RV32IBT %s ; RUN: llc -mtriple=riscv32 -mattr=+f,+experimental-zbt -target-abi=ilp32 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IFBT %s +; RUN: | FileCheck -check-prefixes=RV32,RV32IFBT %s ; RUN: llc -mtriple=riscv64 -target-abi=lp64 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: | FileCheck -check-prefixes=RV64,RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+f,+d -target-abi=lp64 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IFD %s +; RUN: | FileCheck -check-prefixes=RV64,RV64IFD %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -target-abi=lp64 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IBT %s +; RUN: | FileCheck -check-prefixes=RV64,RV64IBT %s ; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-zbt -target-abi=lp64 -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IFDBT %s +; RUN: | FileCheck -check-prefixes=RV64,RV64IFDBT %s ;; This tests how good we are at materialising constants using `select`. The aim ;; is that we do so without a branch if possible (at the moment our lowering of @@ -25,133 +25,43 @@ ;; the other is zero. define signext i32 @select_const_int_easy(i1 zeroext %a) nounwind { -; RV32I-LABEL: select_const_int_easy: -; RV32I: # %bb.0: -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_const_int_easy: -; RV32IF: # %bb.0: -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_const_int_easy: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_const_int_easy: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_const_int_easy: -; RV64I: # %bb.0: -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_const_int_easy: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_const_int_easy: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: ret +; RV32-LABEL: select_const_int_easy: +; RV32: # %bb.0: +; RV32-NEXT: ret ; -; RV64IFDBT-LABEL: select_const_int_easy: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: ret +; RV64-LABEL: select_const_int_easy: +; RV64: # %bb.0: +; RV64-NEXT: ret %1 = select i1 %a, i32 1, i32 0 ret i32 %1 } define signext i32 @select_const_int_one_away(i1 zeroext %a) nounwind { -; RV32I-LABEL: select_const_int_one_away: -; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 4 -; RV32I-NEXT: sub a0, a1, a0 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_const_int_one_away: -; RV32IF: # %bb.0: -; RV32IF-NEXT: li a1, 4 -; RV32IF-NEXT: sub a0, a1, a0 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_const_int_one_away: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: li a1, 4 -; RV32IBT-NEXT: sub a0, a1, a0 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_const_int_one_away: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: li a1, 4 -; RV32IFBT-NEXT: sub a0, a1, a0 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_const_int_one_away: -; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 4 -; RV64I-NEXT: sub a0, a1, a0 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_const_int_one_away: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: li a1, 4 -; RV64IFD-NEXT: sub a0, a1, a0 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_const_int_one_away: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: li a1, 4 -; RV64IBT-NEXT: sub a0, a1, a0 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_const_int_one_away: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: li a1, 4 -; RV64IFDBT-NEXT: sub a0, a1, a0 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_const_int_one_away: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: select_const_int_one_away: +; RV64: # %bb.0: +; RV64-NEXT: li a1, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: ret %1 = select i1 %a, i32 3, i32 4 ret i32 %1 } define signext i32 @select_const_int_pow2_zero(i1 zeroext %a) nounwind { -; RV32I-LABEL: select_const_int_pow2_zero: -; RV32I: # %bb.0: -; RV32I-NEXT: slli a0, a0, 2 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_const_int_pow2_zero: -; RV32IF: # %bb.0: -; RV32IF-NEXT: slli a0, a0, 2 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_const_int_pow2_zero: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: slli a0, a0, 2 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_const_int_pow2_zero: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: slli a0, a0, 2 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_const_int_pow2_zero: -; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 2 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_const_int_pow2_zero: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: slli a0, a0, 2 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_const_int_pow2_zero: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slli a0, a0, 2 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_const_int_pow2_zero: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: slli a0, a0, 2 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_const_int_pow2_zero: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: ret +; +; RV64-LABEL: select_const_int_pow2_zero: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: ret %1 = select i1 %a, i32 4, i32 0 ret i32 %1 } @@ -323,547 +233,259 @@ } define signext i32 @select_eq_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_eq_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: seqz a0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_eq_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: xor a0, a0, a1 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_eq_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: xor a0, a0, a1 -; RV32IBT-NEXT: seqz a0, a0 -; RV32IBT-NEXT: neg a0, a0 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_eq_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: xor a0, a0, a1 -; RV32IFBT-NEXT: seqz a0, a0 -; RV32IFBT-NEXT: neg a0, a0 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_eq_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_eq_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: xor a0, a0, a1 -; RV64IFD-NEXT: seqz a0, a0 -; RV64IFD-NEXT: neg a0, a0 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_eq_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: xor a0, a0, a1 -; RV64IBT-NEXT: seqz a0, a0 -; RV64IBT-NEXT: neg a0, a0 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_eq_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: xor a0, a0, a1 -; RV64IFDBT-NEXT: seqz a0, a0 -; RV64IFDBT-NEXT: neg a0, a0 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_eq_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: ret +; +; RV64-LABEL: select_eq_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %1 = icmp eq i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_ne_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_ne_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_ne_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: xor a0, a0, a1 -; RV32IF-NEXT: snez a0, a0 -; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_ne_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: xor a0, a0, a1 -; RV32IBT-NEXT: snez a0, a0 -; RV32IBT-NEXT: neg a0, a0 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_ne_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: xor a0, a0, a1 -; RV32IFBT-NEXT: snez a0, a0 -; RV32IFBT-NEXT: neg a0, a0 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_ne_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_ne_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: xor a0, a0, a1 -; RV64IFD-NEXT: snez a0, a0 -; RV64IFD-NEXT: neg a0, a0 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_ne_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: xor a0, a0, a1 -; RV64IBT-NEXT: snez a0, a0 -; RV64IBT-NEXT: neg a0, a0 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_ne_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: xor a0, a0, a1 -; RV64IFDBT-NEXT: snez a0, a0 -; RV64IFDBT-NEXT: neg a0, a0 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_ne_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: ret +; +; RV64-LABEL: select_ne_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %1 = icmp ne i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_sgt_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_sgt_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: slt a0, a1, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_sgt_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: slt a0, a1, a0 -; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_sgt_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: slt a0, a1, a0 -; RV32IBT-NEXT: neg a0, a0 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_sgt_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: slt a0, a1, a0 -; RV32IFBT-NEXT: neg a0, a0 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_sgt_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: slt a0, a1, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_sgt_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: slt a0, a1, a0 -; RV64IFD-NEXT: neg a0, a0 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_sgt_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slt a0, a1, a0 -; RV64IBT-NEXT: neg a0, a0 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_sgt_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: slt a0, a1, a0 -; RV64IFDBT-NEXT: neg a0, a0 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_sgt_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: slt a0, a1, a0 +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: select_sgt_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: slt a0, a1, a0 +; RV64-NEXT: neg a0, a0 +; RV64-NEXT: ret %1 = icmp sgt i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_slt_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_slt_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: slt a0, a0, a1 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_slt_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: slt a0, a0, a1 -; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_slt_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: slt a0, a0, a1 -; RV32IBT-NEXT: neg a0, a0 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_slt_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: slt a0, a0, a1 -; RV32IFBT-NEXT: neg a0, a0 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_slt_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: slt a0, a0, a1 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_slt_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: slt a0, a0, a1 -; RV64IFD-NEXT: neg a0, a0 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_slt_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slt a0, a0, a1 -; RV64IBT-NEXT: neg a0, a0 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_slt_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: slt a0, a0, a1 -; RV64IFDBT-NEXT: neg a0, a0 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_slt_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: slt a0, a0, a1 +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: select_slt_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: slt a0, a0, a1 +; RV64-NEXT: neg a0, a0 +; RV64-NEXT: ret %1 = icmp slt i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_sge_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_sge_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: slt a0, a0, a1 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_sge_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: slt a0, a0, a1 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_sge_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: slt a0, a0, a1 -; RV32IBT-NEXT: addi a0, a0, -1 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_sge_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: slt a0, a0, a1 -; RV32IFBT-NEXT: addi a0, a0, -1 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_sge_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: slt a0, a0, a1 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_sge_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: slt a0, a0, a1 -; RV64IFD-NEXT: addi a0, a0, -1 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_sge_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slt a0, a0, a1 -; RV64IBT-NEXT: addi a0, a0, -1 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_sge_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: slt a0, a0, a1 -; RV64IFDBT-NEXT: addi a0, a0, -1 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_sge_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: slt a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: ret +; +; RV64-LABEL: select_sge_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: slt a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %1 = icmp sge i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_sle_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_sle_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: slt a0, a1, a0 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_sle_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: slt a0, a1, a0 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_sle_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: slt a0, a1, a0 -; RV32IBT-NEXT: addi a0, a0, -1 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_sle_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: slt a0, a1, a0 -; RV32IFBT-NEXT: addi a0, a0, -1 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_sle_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: slt a0, a1, a0 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_sle_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: slt a0, a1, a0 -; RV64IFD-NEXT: addi a0, a0, -1 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_sle_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: slt a0, a1, a0 -; RV64IBT-NEXT: addi a0, a0, -1 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_sle_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: slt a0, a1, a0 -; RV64IFDBT-NEXT: addi a0, a0, -1 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_sle_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: slt a0, a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: ret +; +; RV64-LABEL: select_sle_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: slt a0, a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %1 = icmp sle i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_ugt_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_ugt_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: sltu a0, a1, a0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_ugt_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: sltu a0, a1, a0 -; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_ugt_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: sltu a0, a1, a0 -; RV32IBT-NEXT: neg a0, a0 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_ugt_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: sltu a0, a1, a0 -; RV32IFBT-NEXT: neg a0, a0 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_ugt_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: sltu a0, a1, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_ugt_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: sltu a0, a1, a0 -; RV64IFD-NEXT: neg a0, a0 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_ugt_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: sltu a0, a1, a0 -; RV64IBT-NEXT: neg a0, a0 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_ugt_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: sltu a0, a1, a0 -; RV64IFDBT-NEXT: neg a0, a0 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_ugt_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: sltu a0, a1, a0 +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: select_ugt_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: sltu a0, a1, a0 +; RV64-NEXT: neg a0, a0 +; RV64-NEXT: ret %1 = icmp ugt i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_ult_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_ult_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: sltu a0, a0, a1 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_ult_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: sltu a0, a0, a1 -; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_ult_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: sltu a0, a0, a1 -; RV32IBT-NEXT: neg a0, a0 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_ult_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: sltu a0, a0, a1 -; RV32IFBT-NEXT: neg a0, a0 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_ult_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: sltu a0, a0, a1 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_ult_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: sltu a0, a0, a1 -; RV64IFD-NEXT: neg a0, a0 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_ult_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: sltu a0, a0, a1 -; RV64IBT-NEXT: neg a0, a0 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_ult_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: sltu a0, a0, a1 -; RV64IFDBT-NEXT: neg a0, a0 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_ult_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: neg a0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: select_ult_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: neg a0, a0 +; RV64-NEXT: ret %1 = icmp ult i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_uge_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_uge_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: sltu a0, a0, a1 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_uge_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: sltu a0, a0, a1 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_uge_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: sltu a0, a0, a1 -; RV32IBT-NEXT: addi a0, a0, -1 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_uge_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: sltu a0, a0, a1 -; RV32IFBT-NEXT: addi a0, a0, -1 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_uge_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: sltu a0, a0, a1 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_uge_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: sltu a0, a0, a1 -; RV64IFD-NEXT: addi a0, a0, -1 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_uge_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: sltu a0, a0, a1 -; RV64IBT-NEXT: addi a0, a0, -1 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_uge_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: sltu a0, a0, a1 -; RV64IFDBT-NEXT: addi a0, a0, -1 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_uge_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: ret +; +; RV64-LABEL: select_uge_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %1 = icmp uge i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } define signext i32 @select_ule_zero_negone(i32 signext %a, i32 signext %b) nounwind { -; RV32I-LABEL: select_ule_zero_negone: -; RV32I: # %bb.0: -; RV32I-NEXT: sltu a0, a1, a0 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: ret -; -; RV32IF-LABEL: select_ule_zero_negone: -; RV32IF: # %bb.0: -; RV32IF-NEXT: sltu a0, a1, a0 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: ret -; -; RV32IBT-LABEL: select_ule_zero_negone: -; RV32IBT: # %bb.0: -; RV32IBT-NEXT: sltu a0, a1, a0 -; RV32IBT-NEXT: addi a0, a0, -1 -; RV32IBT-NEXT: ret -; -; RV32IFBT-LABEL: select_ule_zero_negone: -; RV32IFBT: # %bb.0: -; RV32IFBT-NEXT: sltu a0, a1, a0 -; RV32IFBT-NEXT: addi a0, a0, -1 -; RV32IFBT-NEXT: ret -; -; RV64I-LABEL: select_ule_zero_negone: -; RV64I: # %bb.0: -; RV64I-NEXT: sltu a0, a1, a0 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: ret -; -; RV64IFD-LABEL: select_ule_zero_negone: -; RV64IFD: # %bb.0: -; RV64IFD-NEXT: sltu a0, a1, a0 -; RV64IFD-NEXT: addi a0, a0, -1 -; RV64IFD-NEXT: ret -; -; RV64IBT-LABEL: select_ule_zero_negone: -; RV64IBT: # %bb.0: -; RV64IBT-NEXT: sltu a0, a1, a0 -; RV64IBT-NEXT: addi a0, a0, -1 -; RV64IBT-NEXT: ret -; -; RV64IFDBT-LABEL: select_ule_zero_negone: -; RV64IFDBT: # %bb.0: -; RV64IFDBT-NEXT: sltu a0, a1, a0 -; RV64IFDBT-NEXT: addi a0, a0, -1 -; RV64IFDBT-NEXT: ret +; RV32-LABEL: select_ule_zero_negone: +; RV32: # %bb.0: +; RV32-NEXT: sltu a0, a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: ret +; +; RV64-LABEL: select_ule_zero_negone: +; RV64: # %bb.0: +; RV64-NEXT: sltu a0, a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: ret %1 = icmp ule i32 %a, %b %2 = select i1 %1, i32 -1, i32 0 ret i32 %2 } + +define i32 @select_eq_1_2(i32 signext %a, i32 signext %b) { +; RV32-LABEL: select_eq_1_2: +; RV32: # %bb.0: +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: ret +; +; RV64-LABEL: select_eq_1_2: +; RV64: # %bb.0: +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: addi a0, a0, 1 +; RV64-NEXT: ret + %1 = icmp eq i32 %a, %b + %2 = select i1 %1, i32 1, i32 2 + ret i32 %2 +} + +define i32 @select_ne_1_2(i32 signext %a, i32 signext %b) { +; RV32-LABEL: select_ne_1_2: +; RV32: # %bb.0: +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: ret +; +; RV64-LABEL: select_ne_1_2: +; RV64: # %bb.0: +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: addi a0, a0, 1 +; RV64-NEXT: ret + %1 = icmp ne i32 %a, %b + %2 = select i1 %1, i32 1, i32 2 + ret i32 %2 +} + +define i32 @select_eq_10000_10001(i32 signext %a, i32 signext %b) { +; RV32-LABEL: select_eq_10000_10001: +; RV32: # %bb.0: +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: lui a1, 2 +; RV32-NEXT: addi a1, a1, 1810 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: select_eq_10000_10001: +; RV64: # %bb.0: +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: addiw a1, a1, 1810 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: ret + %1 = icmp eq i32 %a, %b + %2 = select i1 %1, i32 10001, i32 10002 + ret i32 %2 +} + +define i32 @select_ne_10001_10002(i32 signext %a, i32 signext %b) { +; RV32-LABEL: select_ne_10001_10002: +; RV32: # %bb.0: +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: lui a1, 2 +; RV32-NEXT: addi a1, a1, 1810 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: select_ne_10001_10002: +; RV64: # %bb.0: +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: addiw a1, a1, 1810 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: ret + %1 = icmp ne i32 %a, %b + %2 = select i1 %1, i32 10001, i32 10002 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/setcc-logic.ll b/llvm/test/CodeGen/RISCV/setcc-logic.ll --- a/llvm/test/CodeGen/RISCV/setcc-logic.ll +++ b/llvm/test/CodeGen/RISCV/setcc-logic.ll @@ -118,24 +118,3 @@ %r = and i1 %a, %b ret i1 %r } - -define i32 @bar(i32 %n) { -; RV32I-LABEL: bar: -; RV32I: # %bb.0: # %entry -; RV32I-NEXT: addi a0, a0, -9 -; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: addi a0, a0, 1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: bar: -; RV64I: # %bb.0: # %entry -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: addi a0, a0, -9 -; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: addi a0, a0, 1 -; RV64I-NEXT: ret -entry: - %cmp = icmp eq i32 %n, 9 - %a = select i1 %cmp, i32 1, i32 2 - ret i32 %a -} diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -475,16 +475,16 @@ ; RV32I-LABEL: sext_of_not_cmp_i32: ; RV32I: # %bb.0: ; RV32I-NEXT: addi a0, a0, -7 -; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_cmp_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: addi a0, a0, -7 -; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: ret %cmp = icmp eq i32 %x, 7 %xor = xor i1 %cmp, 1 @@ -497,16 +497,16 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: xori a0, a0, 7 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_cmp_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: addi a0, a0, -7 -; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: ret %cmp = icmp eq i64 %x, 7 %xor = xor i1 %cmp, 1 diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll @@ -0,0 +1,295 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen2/i64/g' %s | llc -mtriple=riscv32 -mattr=+m | \ +; RUN: FileCheck %s --check-prefix=RV32 +; RUN: sed 's/iXLen2/i128/g' %s | llc -mtriple=riscv64 -mattr=+m | \ +; RUN: FileCheck %s --check-prefix=RV64 + +define iXLen2 @test_udiv_3(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_3: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 3 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_3: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 3 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 3 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_5(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_5: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 5 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_5: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 5 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 5 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_7(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_7: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 7 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_7: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 7 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 7 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_9(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_9: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 9 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_9: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 9 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 9 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_15(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_15: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 15 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_15: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 15 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 15 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_17(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_17: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 17 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_17: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 17 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 17 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_255(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_255: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 255 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_255: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 255 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 255 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_257(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_257: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 257 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_257: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 257 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 257 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_65535(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_65535: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_65535: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -1 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 65535 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_65537(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_65537: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, 1 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_65537: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, 1 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 65537 + ret iXLen2 %a +} + +define iXLen2 @test_udiv_12(iXLen2 %x) nounwind { +; RV32-LABEL: test_udiv_12: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 12 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __udivdi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_udiv_12: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 12 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __udivti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = udiv iXLen2 %x, 12 + ret iXLen2 %a +} diff --git a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll @@ -0,0 +1,296 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen2/i64/g' %s | llc -mtriple=riscv32 -mattr=+m | \ +; RUN: FileCheck %s --check-prefix=RV32 +; RUN: sed 's/iXLen2/i128/g' %s | llc -mtriple=riscv64 -mattr=+m | \ +; RUN: FileCheck %s --check-prefix=RV64 + +define iXLen2 @test_urem_3(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_3: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 3 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_3: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 3 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 3 + ret iXLen2 %a +} + +define iXLen2 @test_urem_5(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_5: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 5 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_5: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 5 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 5 + ret iXLen2 %a +} + +define iXLen2 @test_urem_7(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_7: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 7 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_7: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 7 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 7 + ret iXLen2 %a +} + +define iXLen2 @test_urem_9(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_9: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 9 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_9: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 9 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 9 + ret iXLen2 %a +} + +define iXLen2 @test_urem_15(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_15: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 15 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_15: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 15 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 15 + ret iXLen2 %a +} + +define iXLen2 @test_urem_17(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_17: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 17 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_17: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 17 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 17 + ret iXLen2 %a +} + +define iXLen2 @test_urem_255(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_255: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 255 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_255: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 255 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 255 + ret iXLen2 %a +} + +define iXLen2 @test_urem_257(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_257: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 257 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_257: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 257 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 257 + ret iXLen2 %a +} + +define iXLen2 @test_urem_65535(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_65535: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_65535: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, -1 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 65535 + ret iXLen2 %a +} + +define iXLen2 @test_urem_65537(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_65537: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, 1 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_65537: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a2, 16 +; RV64-NEXT: addiw a2, a2, 1 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 65537 + ret iXLen2 %a +} + +define iXLen2 @test_urem_12(iXLen2 %x) nounwind { +; RV32-LABEL: test_urem_12: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a2, 12 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: call __umoddi3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_12: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a2, 12 +; RV64-NEXT: li a3, 0 +; RV64-NEXT: call __umodti3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = urem iXLen2 %x, 12 + ret iXLen2 %a +} + diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -347,27 +347,27 @@ ; RV32-NEXT: call __moddi3@plt ; RV32-NEXT: xori a2, s2, 2 ; RV32-NEXT: or a2, a2, s3 -; RV32-NEXT: snez a2, a2 +; RV32-NEXT: seqz a2, a2 ; RV32-NEXT: xori a3, s5, 1 ; RV32-NEXT: or a3, a3, s6 -; RV32-NEXT: snez a3, a3 +; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: snez a0, a0 -; RV32-NEXT: neg a1, a3 -; RV32-NEXT: neg a4, a2 -; RV32-NEXT: neg a5, a0 -; RV32-NEXT: sw a5, 0(s0) -; RV32-NEXT: andi a4, a4, 7 -; RV32-NEXT: sb a4, 12(s0) -; RV32-NEXT: slli a3, a3, 1 -; RV32-NEXT: sub a0, a0, a3 +; RV32-NEXT: addi a1, a3, -1 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: neg a3, a0 +; RV32-NEXT: sw a3, 0(s0) +; RV32-NEXT: andi a3, a2, 7 +; RV32-NEXT: sb a3, 12(s0) +; RV32-NEXT: slli a3, a1, 1 +; RV32-NEXT: or a0, a3, a0 ; RV32-NEXT: sw a0, 4(s0) ; RV32-NEXT: srli a0, a1, 31 ; RV32-NEXT: andi a1, a1, 1 ; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: slli a1, a2, 2 -; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: sw a0, 8(s0) ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -422,24 +422,24 @@ ; RV64-NEXT: or a0, a0, a2 ; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: addi a1, s1, -2 -; RV64-NEXT: snez a1, a1 +; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a2, s3, -1 -; RV64-NEXT: snez a2, a2 +; RV64-NEXT: seqz a2, a2 ; RV64-NEXT: neg a0, a0 -; RV64-NEXT: neg a3, a2 -; RV64-NEXT: neg a4, a1 -; RV64-NEXT: slli a4, a4, 29 -; RV64-NEXT: srli a4, a4, 61 -; RV64-NEXT: sb a4, 12(s0) -; RV64-NEXT: slliw a1, a1, 2 -; RV64-NEXT: slli a3, a3, 31 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: slli a3, a1, 29 +; RV64-NEXT: srli a3, a3, 61 +; RV64-NEXT: sb a3, 12(s0) +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: slli a3, a2, 31 ; RV64-NEXT: srli a3, a3, 62 -; RV64-NEXT: subw a1, a3, a1 +; RV64-NEXT: or a1, a3, a1 ; RV64-NEXT: sw a1, 8(s0) ; RV64-NEXT: slli a0, a0, 31 ; RV64-NEXT: srli a0, a0, 31 ; RV64-NEXT: slli a1, a2, 33 -; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: sd a0, 0(s0) ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -498,27 +498,27 @@ ; RV32M-NEXT: call __moddi3@plt ; RV32M-NEXT: xori a2, s2, 2 ; RV32M-NEXT: or a2, a2, s3 -; RV32M-NEXT: snez a2, a2 +; RV32M-NEXT: seqz a2, a2 ; RV32M-NEXT: xori a3, s5, 1 ; RV32M-NEXT: or a3, a3, s6 -; RV32M-NEXT: snez a3, a3 +; RV32M-NEXT: seqz a3, a3 ; RV32M-NEXT: or a0, a0, a1 ; RV32M-NEXT: snez a0, a0 -; RV32M-NEXT: neg a1, a3 -; RV32M-NEXT: neg a4, a2 -; RV32M-NEXT: neg a5, a0 -; RV32M-NEXT: sw a5, 0(s0) -; RV32M-NEXT: andi a4, a4, 7 -; RV32M-NEXT: sb a4, 12(s0) -; RV32M-NEXT: slli a3, a3, 1 -; RV32M-NEXT: sub a0, a0, a3 +; RV32M-NEXT: addi a1, a3, -1 +; RV32M-NEXT: addi a2, a2, -1 +; RV32M-NEXT: neg a3, a0 +; RV32M-NEXT: sw a3, 0(s0) +; RV32M-NEXT: andi a3, a2, 7 +; RV32M-NEXT: sb a3, 12(s0) +; RV32M-NEXT: slli a3, a1, 1 +; RV32M-NEXT: or a0, a3, a0 ; RV32M-NEXT: sw a0, 4(s0) ; RV32M-NEXT: srli a0, a1, 31 ; RV32M-NEXT: andi a1, a1, 1 ; RV32M-NEXT: slli a1, a1, 1 ; RV32M-NEXT: or a0, a0, a1 ; RV32M-NEXT: slli a1, a2, 2 -; RV32M-NEXT: sub a0, a0, a1 +; RV32M-NEXT: or a0, a0, a1 ; RV32M-NEXT: sw a0, 8(s0) ; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -566,34 +566,34 @@ ; RV64M-NEXT: add a4, a5, a4 ; RV64M-NEXT: add a1, a1, a4 ; RV64M-NEXT: addi a1, a1, -2 -; RV64M-NEXT: snez a1, a1 +; RV64M-NEXT: seqz a1, a1 ; RV64M-NEXT: lui a4, %hi(.LCPI3_2) ; RV64M-NEXT: ld a4, %lo(.LCPI3_2)(a4) ; RV64M-NEXT: lui a5, %hi(.LCPI3_3) ; RV64M-NEXT: ld a5, %lo(.LCPI3_3)(a5) ; RV64M-NEXT: addi a2, a2, -1 -; RV64M-NEXT: snez a2, a2 +; RV64M-NEXT: seqz a2, a2 ; RV64M-NEXT: mul a3, a3, a4 ; RV64M-NEXT: add a3, a3, a5 ; RV64M-NEXT: slli a4, a3, 63 ; RV64M-NEXT: srli a3, a3, 1 ; RV64M-NEXT: or a3, a3, a4 ; RV64M-NEXT: sltu a3, a5, a3 -; RV64M-NEXT: neg a4, a2 -; RV64M-NEXT: neg a5, a1 +; RV64M-NEXT: addi a2, a2, -1 +; RV64M-NEXT: addi a1, a1, -1 ; RV64M-NEXT: neg a3, a3 -; RV64M-NEXT: slli a2, a2, 33 +; RV64M-NEXT: slli a4, a1, 29 +; RV64M-NEXT: srli a4, a4, 61 +; RV64M-NEXT: sb a4, 12(a0) +; RV64M-NEXT: slli a4, a2, 33 ; RV64M-NEXT: slli a3, a3, 31 ; RV64M-NEXT: srli a3, a3, 31 -; RV64M-NEXT: sub a2, a3, a2 -; RV64M-NEXT: sd a2, 0(a0) -; RV64M-NEXT: slli a2, a5, 29 -; RV64M-NEXT: srli a2, a2, 61 -; RV64M-NEXT: sb a2, 12(a0) -; RV64M-NEXT: slliw a1, a1, 2 -; RV64M-NEXT: slli a2, a4, 31 +; RV64M-NEXT: or a3, a3, a4 +; RV64M-NEXT: sd a3, 0(a0) +; RV64M-NEXT: slli a1, a1, 2 +; RV64M-NEXT: slli a2, a2, 31 ; RV64M-NEXT: srli a2, a2, 62 -; RV64M-NEXT: subw a1, a2, a1 +; RV64M-NEXT: or a1, a2, a1 ; RV64M-NEXT: sw a1, 8(a0) ; RV64M-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/trunc-free.ll b/llvm/test/CodeGen/RISCV/trunc-free.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/trunc-free.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +; Make sure we use lwu for the load, and don't emit +; a sext.w for the compare. This requires isTruncateFree +; to return true for i64->i32. Otherwise we emit a +; lw and a shift pair for the zext. + +define void @foo(i32* %p, i64* %q, i32* %r) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: lwu a0, 0(a0) +; CHECK-NEXT: sd a0, 0(a1) +; CHECK-NEXT: beqz a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if +; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: .LBB0_2: # %end +; CHECK-NEXT: ret + %a = load i32, i32* %p + %b = zext i32 %a to i64 + store i64 %b, i64* %q + %c = icmp ne i32 %a, 0 + br i1 %c, label %if, label %end + +if: + store i32 %a, i32* %r + br label %end + +end: + ret void +} diff --git a/llvm/test/CodeGen/X86/avx-insertelt.ll b/llvm/test/CodeGen/X86/avx-insertelt.ll --- a/llvm/test/CodeGen/X86/avx-insertelt.ll +++ b/llvm/test/CodeGen/X86/avx-insertelt.ll @@ -422,7 +422,7 @@ define <4 x i64> @insert_i64_two_elts_of_high_subvector(<4 x i64> %x, i64 %s) { ; AVX-LABEL: insert_i64_two_elts_of_high_subvector: ; AVX: # %bb.0: -; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1 +; AVX-NEXT: vmovq %rdi, %xmm1 ; AVX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: retq @@ -523,7 +523,7 @@ define <4 x i64> @insert_i64_two_elts_of_low_subvector(<4 x i64> %x, i64 %s) { ; AVX-LABEL: insert_i64_two_elts_of_low_subvector: ; AVX: # %bb.0: -; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1 +; AVX-NEXT: vmovq %rdi, %xmm1 ; AVX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/callbr-asm-kill.mir b/llvm/test/CodeGen/X86/callbr-asm-kill.mir --- a/llvm/test/CodeGen/X86/callbr-asm-kill.mir +++ b/llvm/test/CodeGen/X86/callbr-asm-kill.mir @@ -57,7 +57,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY killed $rdi ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY killed [[COPY1]] ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.loop (address-taken, inlineasm-br-indirect-target): + ; CHECK-NEXT: bb.1.loop (ir-block-address-taken %ir-block.loop, inlineasm-br-indirect-target): ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY killed [[COPY2]] @@ -78,7 +78,7 @@ %3:gr64 = COPY killed $rsi %2:gr64 = COPY killed $rdi - bb.1.loop (address-taken, inlineasm-br-indirect-target): + bb.1.loop (ir-block-address-taken %ir-block.loop, inlineasm-br-indirect-target): successors: %bb.2(0x80000000), %bb.1(0x00000000) %0:gr64 = PHI %2, %bb.0, %1, %bb.1 diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs-pred-succ.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs-pred-succ.ll --- a/llvm/test/CodeGen/X86/callbr-asm-outputs-pred-succ.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-outputs-pred-succ.ll @@ -17,12 +17,12 @@ ; Check the second INLINEASM_BR target block is preceded by the block with the ; second INLINEASM_BR. -; CHECK: bb.2 (%ir-block.7, address-taken, inlineasm-br-indirect-target): +; CHECK: bb.2 (%ir-block.7, machine-block-address-taken, ir-block-address-taken %ir-block.7, inlineasm-br-indirect-target): ; CHECK-NEXT: predecessors: %bb.1 ; Check the first INLINEASM_BR target block is predecessed by the block with ; the first INLINEASM_BR. -; CHECK: bb.4 (%ir-block.11, address-taken, inlineasm-br-indirect-target): +; CHECK: bb.4 (%ir-block.11, machine-block-address-taken, ir-block-address-taken %ir-block.11, inlineasm-br-indirect-target): ; CHECK-NEXT: predecessors: %bb.0 @.str = private unnamed_addr constant [26 x i8] c"inline asm#1 returned %d\0A\00", align 1 diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -80,13 +80,13 @@ define <4 x i64> @combine_vec_mul_pow2c(<4 x i64> %x) { ; SSE-LABEL: combine_vec_mul_pow2c: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: psllq $1, %xmm2 -; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: movdqa %xmm1, %xmm2 ; SSE-NEXT: psllq $4, %xmm2 ; SSE-NEXT: psllq $2, %xmm1 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: paddq %xmm0, %xmm2 +; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_pow2c: diff --git a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefix=CHECK + +define void @test_compress_undef_float_passthrough() { +; CHECK-LABEL: test_compress_undef_float_passthrough: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movb $5, %al +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vcompresspd %ymm0, %ymm0 {%k1} {z} +; CHECK-NEXT: vscatterqpd %ymm0, (,%ymm0) {%k1} +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: ; preds = %loop.50 + %0 = bitcast i4 undef to <4 x i1> + %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> ) + call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %1, <4 x double*> undef, i32 0, <4 x i1> %0) + ret void +} + +; Function Attrs: nounwind readnone +declare <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double>, <4 x double>, <4 x i1>) + +; Function Attrs: nocallback nofree nosync nounwind willreturn writeonly +declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32 immarg, <4 x i1>) + diff --git a/llvm/test/CodeGen/X86/ctor-priority-coff.ll b/llvm/test/CodeGen/X86/ctor-priority-coff.ll --- a/llvm/test/CodeGen/X86/ctor-priority-coff.ll +++ b/llvm/test/CodeGen/X86/ctor-priority-coff.ll @@ -6,6 +6,15 @@ ; CHECK: .section .CRT$XCA00042,"dr" ; CHECK: .p2align 3 ; CHECK: .quad f +; CHECK: .section .CRT$XCC,"dr" +; CHECK: .p2align 3 +; CHECK: .quad i +; CHECK: .section .CRT$XCC00250,"dr" +; CHECK: .p2align 3 +; CHECK: .quad k +; CHECK: .section .CRT$XCL,"dr" +; CHECK: .p2align 3 +; CHECK: .quad j ; CHECK: .section .CRT$XCT12345,"dr" ; CHECK: .p2align 3 ; CHECK: .quad g @@ -24,10 +33,13 @@ @str1 = private dso_local unnamed_addr constant [6 x i8] c"first\00", align 1 @str2 = private dso_local unnamed_addr constant [5 x i8] c"main\00", align 1 -@llvm.global_ctors = appending global [3 x { i32, ptr, ptr }] [ +@llvm.global_ctors = appending global [6 x { i32, ptr, ptr }] [ { i32, ptr, ptr } { i32 12345, ptr @g, ptr null }, { i32, ptr, ptr } { i32 42, ptr @f, ptr null }, - { i32, ptr, ptr } { i32 23456, ptr @init_h, ptr @h } + { i32, ptr, ptr } { i32 23456, ptr @init_h, ptr @h }, + { i32, ptr, ptr } { i32 200, ptr @i, ptr null }, + { i32, ptr, ptr } { i32 400, ptr @j, ptr null }, + { i32, ptr, ptr } { i32 250, ptr @k, ptr null } ] declare dso_local i32 @puts(ptr nocapture readonly) local_unnamed_addr @@ -50,6 +62,23 @@ ret void } +define dso_local void @i() { +entry: + store i8 43, ptr @h + ret void +} + +define dso_local void @j() { +entry: + store i8 44, ptr @h + ret void +} + +define dso_local void @k() { +entry: + store i8 45, ptr @h + ret void +} ; Function Attrs: nounwind uwtable define dso_local i32 @main() local_unnamed_addr { diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll --- a/llvm/test/CodeGen/X86/divide-by-constant.ll +++ b/llvm/test/CodeGen/X86/divide-by-constant.ll @@ -456,3 +456,481 @@ %6 = insertvalue { i64, i32 } %5, i32 %4, 1 ret { i64, i32 } %6 } + +define i64 @urem_i64_3(i64 %x) nounwind { +; X32-LABEL: urem_i64_3: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $3 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_3: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: shrq %rdx +; X64-NEXT: leaq (%rdx,%rdx,2), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 3 + ret i64 %rem +} + +define i64 @urem_i64_5(i64 %x) nounwind { +; X32-LABEL: urem_i64_5: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $5 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_5: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-3689348814741910323, %rcx # imm = 0xCCCCCCCCCCCCCCCD +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: shrq $2, %rdx +; X64-NEXT: leaq (%rdx,%rdx,4), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 5 + ret i64 %rem +} + +define i64 @urem_i64_15(i64 %x) nounwind { +; X32-LABEL: urem_i64_15: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $15 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_15: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: shrq $3, %rdx +; X64-NEXT: leaq (%rdx,%rdx,4), %rax +; X64-NEXT: leaq (%rax,%rax,2), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 15 + ret i64 %rem +} + +define i64 @urem_i64_17(i64 %x) nounwind { +; X32-LABEL: urem_i64_17: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $17 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_17: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F1 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: andq $-16, %rax +; X64-NEXT: shrq $4, %rdx +; X64-NEXT: addq %rax, %rdx +; X64-NEXT: subq %rdx, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 17 + ret i64 %rem +} + +define i64 @urem_i64_255(i64 %x) nounwind { +; X32-LABEL: urem_i64_255: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $255 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_255: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-9187201950435737471, %rcx # imm = 0x8080808080808081 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: shrq $7, %rdx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shlq $8, %rax +; X64-NEXT: subq %rax, %rdx +; X64-NEXT: leaq (%rdx,%rdi), %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 255 + ret i64 %rem +} + +define i64 @urem_i64_257(i64 %x) nounwind { +; X32-LABEL: urem_i64_257: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $257 # imm = 0x101 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_257: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-71777214294589695, %rcx # imm = 0xFF00FF00FF00FF01 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: andq $-256, %rax +; X64-NEXT: shrq $8, %rdx +; X64-NEXT: addq %rax, %rdx +; X64-NEXT: subq %rdx, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 257 + ret i64 %rem +} + +define i64 @urem_i64_65535(i64 %x) nounwind { +; X32-LABEL: urem_i64_65535: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $65535 # imm = 0xFFFF +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_65535: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-9223231297218904063, %rcx # imm = 0x8000800080008001 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: shrq $15, %rdx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shlq $16, %rax +; X64-NEXT: subq %rax, %rdx +; X64-NEXT: leaq (%rdx,%rdi), %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 65535 + ret i64 %rem +} + +define i64 @urem_i64_65537(i64 %x) nounwind { +; X32-LABEL: urem_i64_65537: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $65537 # imm = 0x10001 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_65537: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-281470681808895, %rcx # imm = 0xFFFF0000FFFF0001 +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: andq $-65536, %rax # imm = 0xFFFF0000 +; X64-NEXT: shrq $16, %rdx +; X64-NEXT: addq %rax, %rdx +; X64-NEXT: subq %rdx, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 65537 + ret i64 %rem +} + +define i64 @urem_i64_12(i64 %x) nounwind { +; X32-LABEL: urem_i64_12: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $12 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __umoddi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: urem_i64_12: +; X64: # %bb.0: # %entry +; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %rcx +; X64-NEXT: shrq %rdx +; X64-NEXT: andq $-4, %rdx +; X64-NEXT: leaq (%rdx,%rdx,2), %rax +; X64-NEXT: subq %rax, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq +entry: + %rem = urem i64 %x, 12 + ret i64 %rem +} + +define i64 @udiv_i64_3(i64 %x) nounwind { +; X32-LABEL: udiv_i64_3: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $3 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_3: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 3 + ret i64 %rem +} + +define i64 @udiv_i64_5(i64 %x) nounwind { +; X32-LABEL: udiv_i64_5: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $5 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_5: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-3689348814741910323, %rcx # imm = 0xCCCCCCCCCCCCCCCD +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq $2, %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 5 + ret i64 %rem +} + +define i64 @udiv_i64_15(i64 %x) nounwind { +; X32-LABEL: udiv_i64_15: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $15 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_15: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq $3, %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 15 + ret i64 %rem +} + +define i64 @udiv_i64_17(i64 %x) nounwind { +; X32-LABEL: udiv_i64_17: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $17 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_17: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F1 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq $4, %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 17 + ret i64 %rem +} + +define i64 @udiv_i64_255(i64 %x) nounwind { +; X32-LABEL: udiv_i64_255: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $255 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_255: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-9187201950435737471, %rcx # imm = 0x8080808080808081 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq $7, %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 255 + ret i64 %rem +} + +define i64 @udiv_i64_257(i64 %x) nounwind { +; X32-LABEL: udiv_i64_257: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $257 # imm = 0x101 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_257: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-71777214294589695, %rcx # imm = 0xFF00FF00FF00FF01 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq $8, %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 257 + ret i64 %rem +} + +define i64 @udiv_i64_65535(i64 %x) nounwind { +; X32-LABEL: udiv_i64_65535: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $65535 # imm = 0xFFFF +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_65535: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-9223231297218904063, %rcx # imm = 0x8000800080008001 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq $15, %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 65535 + ret i64 %rem +} + +define i64 @udiv_i64_65537(i64 %x) nounwind { +; X32-LABEL: udiv_i64_65537: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $65537 # imm = 0x10001 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_65537: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-281470681808895, %rcx # imm = 0xFFFF0000FFFF0001 +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq $16, %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 65537 + ret i64 %rem +} + +define i64 @udiv_i64_12(i64 %x) nounwind { +; X32-LABEL: udiv_i64_12: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $12 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __udivdi3 +; X32-NEXT: addl $28, %esp +; X32-NEXT: retl +; +; X64-LABEL: udiv_i64_12: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB +; X64-NEXT: mulq %rcx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shrq $3, %rax +; X64-NEXT: retq +entry: + %rem = udiv i64 %x, 12 + ret i64 %rem +} diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll --- a/llvm/test/CodeGen/X86/divmod128.ll +++ b/llvm/test/CodeGen/X86/divmod128.ll @@ -123,3 +123,543 @@ %2 = trunc i128 %1 to i64 ret i64 %2 } + +define i128 @urem_i128_3(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_3: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $3, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_3: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $3, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 3 + ret i128 %rem +} + +define i128 @urem_i128_5(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_5: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $5, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_5: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $5, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 5 + ret i128 %rem +} + +define i128 @urem_i128_15(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_15: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $15, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_15: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $15, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 15 + ret i128 %rem +} + +define i128 @urem_i128_17(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_17: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $17, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_17: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $17, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 17 + ret i128 %rem +} + +define i128 @urem_i128_255(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_255: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $255, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_255: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $255, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 255 + ret i128 %rem +} + +define i128 @urem_i128_257(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_257: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $257, %edx # imm = 0x101 +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_257: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $257, {{[0-9]+}}(%rsp) # imm = 0x101 +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 257 + ret i128 %rem +} + +define i128 @urem_i128_65535(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_65535: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $65535, %edx # imm = 0xFFFF +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_65535: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $65535, {{[0-9]+}}(%rsp) # imm = 0xFFFF +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 65535 + ret i128 %rem +} + +define i128 @urem_i128_65537(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_65537: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $65537, %edx # imm = 0x10001 +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_65537: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $65537, {{[0-9]+}}(%rsp) # imm = 0x10001 +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 65537 + ret i128 %rem +} + +define i128 @urem_i128_12(i128 %x) nounwind { +; X86-64-LABEL: urem_i128_12: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $12, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __umodti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: urem_i128_12: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $12, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __umodti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = urem i128 %x, 12 + ret i128 %rem +} + +define i128 @udiv_i128_3(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_3: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $3, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_3: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $3, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 3 + ret i128 %rem +} + +define i128 @udiv_i128_5(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_5: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $5, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_5: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $5, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 5 + ret i128 %rem +} + +define i128 @udiv_i128_15(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_15: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $15, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_15: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $15, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 15 + ret i128 %rem +} + +define i128 @udiv_i128_17(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_17: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $17, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_17: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $17, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 17 + ret i128 %rem +} + +define i128 @udiv_i128_255(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_255: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $255, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_255: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $255, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 255 + ret i128 %rem +} + +define i128 @udiv_i128_257(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_257: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $257, %edx # imm = 0x101 +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_257: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $257, {{[0-9]+}}(%rsp) # imm = 0x101 +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 257 + ret i128 %rem +} + +define i128 @udiv_i128_65535(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_65535: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $65535, %edx # imm = 0xFFFF +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_65535: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $65535, {{[0-9]+}}(%rsp) # imm = 0xFFFF +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 65535 + ret i128 %rem +} + +define i128 @udiv_i128_65537(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_65537: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $65537, %edx # imm = 0x10001 +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_65537: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $65537, {{[0-9]+}}(%rsp) # imm = 0x10001 +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 65537 + ret i128 %rem +} + +define i128 @udiv_i128_12(i128 %x) nounwind { +; X86-64-LABEL: udiv_i128_12: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: pushq %rax +; X86-64-NEXT: movl $12, %edx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: callq __udivti3@PLT +; X86-64-NEXT: popq %rcx +; X86-64-NEXT: retq +; +; WIN64-LABEL: udiv_i128_12: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $12, {{[0-9]+}}(%rsp) +; WIN64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __udivti3 +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; WIN64-NEXT: movq %xmm0, %rdx +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +entry: + %rem = udiv i128 %x, 12 + ret i128 %rem +} diff --git a/llvm/test/CodeGen/X86/freeze-binary.ll b/llvm/test/CodeGen/X86/freeze-binary.ll --- a/llvm/test/CodeGen/X86/freeze-binary.ll +++ b/llvm/test/CodeGen/X86/freeze-binary.ll @@ -398,7 +398,7 @@ define <2 x i64> @freeze_shl_vec_outofrange(<2 x i64> %a0) nounwind { ; X86-LABEL: freeze_shl_vec_outofrange: ; X86: # %bb.0: -; X86-NEXT: psllq $1, %xmm0 +; X86-NEXT: paddq %xmm0, %xmm0 ; X86-NEXT: psllq $2, %xmm0 ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/gcc_except_table_bb_sections_nolpads.ll b/llvm/test/CodeGen/X86/gcc_except_table_bb_sections_nolpads.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/gcc_except_table_bb_sections_nolpads.ll @@ -0,0 +1,44 @@ +;; Verify that @LPStart is omitted when there are no landing pads. This test +;; uses an unkown personality to force emitting the exception table. + +; RUN: llc -basic-block-sections=all -mtriple=x86_64 < %s | FileCheck %s + +declare void @throwit() +declare i32 @__unknown_ehpersonality(...) + +define void @foo(i1 %cond) uwtable personality ptr @__unknown_ehpersonality { +entry: + br i1 %cond, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + call void @throwit() + unreachable + +cond.false: ; preds = %entry + ret void +} + +; CHECK: GCC_except_table0: +; CHECK-NEXT: .Lexception0: +; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit +; CHECK-NEXT: .byte 255 # @TType Encoding = omit +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Laction_table_base0-.Lcst_begin0 +; CHECK-NEXT: .Lcst_begin0: +; CHECK-NEXT: .Lexception1: +; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit +; CHECK-NEXT: .byte 255 # @TType Encoding = omit +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Laction_table_base0-.Lcst_begin1 +; CHECK-NEXT: .Lcst_begin1: +; CHECK-NEXT: .Lexception2: +; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit +; CHECK-NEXT: .byte 255 # @TType Encoding = omit +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Laction_table_base0-.Lcst_begin2 +; CHECK-NEXT: .Lcst_begin2: +; CHECK-NEXT: .uleb128 foo.__part.2-foo.__part.2 # >> Call Site 1 << +; CHECK-NEXT: .uleb128 .LBB_END0_2-foo.__part.2 # Call between foo.__part.2 and .LBB_END0_2 +; CHECK-NEXT: .byte 0 # has no landing pad +; CHECK-NEXT: .byte 0 # On action: cleanup +; CHECK-NEXT: .Laction_table_base0: diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll --- a/llvm/test/CodeGen/X86/oddsubvector.ll +++ b/llvm/test/CodeGen/X86/oddsubvector.ll @@ -157,71 +157,71 @@ ; SSE2-LABEL: PR42833: ; SSE2: # %bb.0: ; SSE2-NEXT: movl b(%rip), %eax -; SSE2-NEXT: movdqa c+144(%rip), %xmm0 -; SSE2-NEXT: movdqa c+128(%rip), %xmm1 +; SSE2-NEXT: movdqa c+128(%rip), %xmm0 +; SSE2-NEXT: movdqa c+144(%rip), %xmm1 ; SSE2-NEXT: addl c+128(%rip), %eax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: paddd %xmm1, %xmm3 +; SSE2-NEXT: paddd %xmm0, %xmm3 ; SSE2-NEXT: movdqa d+144(%rip), %xmm4 -; SSE2-NEXT: psubd %xmm0, %xmm4 -; SSE2-NEXT: paddd %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: paddd %xmm1, %xmm5 +; SSE2-NEXT: psubd %xmm1, %xmm4 +; SSE2-NEXT: paddd %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: paddd %xmm0, %xmm5 ; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3] -; SSE2-NEXT: movdqa %xmm0, c+144(%rip) +; SSE2-NEXT: movdqa %xmm1, c+144(%rip) ; SSE2-NEXT: movaps %xmm5, c+128(%rip) -; SSE2-NEXT: movdqa c+160(%rip), %xmm0 +; SSE2-NEXT: movdqa c+160(%rip), %xmm1 ; SSE2-NEXT: movdqa c+176(%rip), %xmm3 ; SSE2-NEXT: movdqa d+160(%rip), %xmm5 ; SSE2-NEXT: movdqa d+176(%rip), %xmm6 ; SSE2-NEXT: movdqa d+128(%rip), %xmm7 -; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] -; SSE2-NEXT: psubd %xmm1, %xmm7 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE2-NEXT: psubd %xmm0, %xmm7 ; SSE2-NEXT: psubd %xmm3, %xmm6 -; SSE2-NEXT: psubd %xmm0, %xmm5 +; SSE2-NEXT: psubd %xmm1, %xmm5 ; SSE2-NEXT: movdqa %xmm5, d+160(%rip) ; SSE2-NEXT: movdqa %xmm6, d+176(%rip) ; SSE2-NEXT: movdqa %xmm4, d+144(%rip) ; SSE2-NEXT: movdqa %xmm7, d+128(%rip) ; SSE2-NEXT: paddd %xmm3, %xmm3 -; SSE2-NEXT: paddd %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm0, c+160(%rip) +; SSE2-NEXT: paddd %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, c+160(%rip) ; SSE2-NEXT: movdqa %xmm3, c+176(%rip) ; SSE2-NEXT: retq ; ; SSE42-LABEL: PR42833: ; SSE42: # %bb.0: ; SSE42-NEXT: movl b(%rip), %eax -; SSE42-NEXT: movdqa c+144(%rip), %xmm0 -; SSE42-NEXT: movdqa c+128(%rip), %xmm1 +; SSE42-NEXT: movdqa c+128(%rip), %xmm0 +; SSE42-NEXT: movdqa c+144(%rip), %xmm1 ; SSE42-NEXT: addl c+128(%rip), %eax ; SSE42-NEXT: movd %eax, %xmm2 -; SSE42-NEXT: paddd %xmm1, %xmm2 +; SSE42-NEXT: paddd %xmm0, %xmm2 ; SSE42-NEXT: movdqa d+144(%rip), %xmm3 -; SSE42-NEXT: psubd %xmm0, %xmm3 -; SSE42-NEXT: paddd %xmm0, %xmm0 -; SSE42-NEXT: movdqa %xmm1, %xmm4 -; SSE42-NEXT: paddd %xmm1, %xmm4 +; SSE42-NEXT: psubd %xmm1, %xmm3 +; SSE42-NEXT: paddd %xmm1, %xmm1 +; SSE42-NEXT: movdqa %xmm0, %xmm4 +; SSE42-NEXT: paddd %xmm0, %xmm4 ; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7] -; SSE42-NEXT: movdqa %xmm0, c+144(%rip) +; SSE42-NEXT: movdqa %xmm1, c+144(%rip) ; SSE42-NEXT: movdqa %xmm4, c+128(%rip) -; SSE42-NEXT: movdqa c+160(%rip), %xmm0 +; SSE42-NEXT: movdqa c+160(%rip), %xmm1 ; SSE42-NEXT: movdqa c+176(%rip), %xmm2 ; SSE42-NEXT: movdqa d+160(%rip), %xmm4 ; SSE42-NEXT: movdqa d+176(%rip), %xmm5 ; SSE42-NEXT: movdqa d+128(%rip), %xmm6 -; SSE42-NEXT: pinsrd $0, %eax, %xmm1 -; SSE42-NEXT: psubd %xmm1, %xmm6 +; SSE42-NEXT: pinsrd $0, %eax, %xmm0 +; SSE42-NEXT: psubd %xmm0, %xmm6 ; SSE42-NEXT: psubd %xmm2, %xmm5 -; SSE42-NEXT: psubd %xmm0, %xmm4 +; SSE42-NEXT: psubd %xmm1, %xmm4 ; SSE42-NEXT: movdqa %xmm4, d+160(%rip) ; SSE42-NEXT: movdqa %xmm5, d+176(%rip) ; SSE42-NEXT: movdqa %xmm3, d+144(%rip) ; SSE42-NEXT: movdqa %xmm6, d+128(%rip) ; SSE42-NEXT: paddd %xmm2, %xmm2 -; SSE42-NEXT: paddd %xmm0, %xmm0 -; SSE42-NEXT: movdqa %xmm0, c+160(%rip) +; SSE42-NEXT: paddd %xmm1, %xmm1 +; SSE42-NEXT: movdqa %xmm1, c+160(%rip) ; SSE42-NEXT: movdqa %xmm2, c+176(%rip) ; SSE42-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir --- a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir +++ b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir @@ -33,7 +33,7 @@ } ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn - declare ptr @llvm.threadlocal.address.p0(ptr) #0 + declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #0 attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir --- a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir +++ b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir @@ -23,7 +23,7 @@ } ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn - declare ptr @llvm.threadlocal.address.p0(ptr) #0 + declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #0 attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll --- a/llvm/test/CodeGen/X86/rotate_vec.ll +++ b/llvm/test/CodeGen/X86/rotate_vec.ll @@ -111,21 +111,18 @@ ; XOPAVX1-LABEL: rot_v4i32_mask_ashr0: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: rot_v4i32_mask_ashr0: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: rot_v4i32_mask_ashr0: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = ashr <4 x i32> %a0, @@ -139,7 +136,6 @@ ; XOPAVX1-LABEL: rot_v4i32_mask_ashr1: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX1-NEXT: retq @@ -147,7 +143,6 @@ ; XOPAVX2-LABEL: rot_v4i32_mask_ashr1: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpsrad $25, %xmm0, %xmm0 -; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX2-NEXT: retq @@ -155,7 +150,6 @@ ; AVX512-LABEL: rot_v4i32_mask_ashr1: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsrad $25, %xmm0, %xmm0 -; AVX512-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -581,28 +581,33 @@ ; X64-NEXT: subq $104, %rsp ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: pxor %xmm2, %xmm2 -; X64-NEXT: pcmpgtd %xmm0, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] +; X64-NEXT: psllq $32, %xmm3 +; X64-NEXT: movdqa %xmm3, %xmm2 +; X64-NEXT: psrad $31, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; X64-NEXT: psrlq $31, %xmm3 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-NEXT: paddq %xmm0, %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %rbp +; X64-NEXT: movq %xmm0, %r15 +; X64-NEXT: movq %r15, %rbp ; X64-NEXT: sarq $63, %rbp -; X64-NEXT: shldq $31, %rbx, %rbp +; X64-NEXT: shldq $31, %r15, %rbp +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; X64-NEXT: pxor %xmm0, %xmm0 ; X64-NEXT: pcmpgtd %xmm1, %xmm0 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movq %xmm1, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: sarq $63, %r15 -; X64-NEXT: movq %rbx, %r12 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: sarq $63, %rbx +; X64-NEXT: movq %r15, %r12 ; X64-NEXT: shlq $31, %r12 ; X64-NEXT: movq %r12, %rdi ; X64-NEXT: movq %rbp, %rsi -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __divti3@PLT ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -610,16 +615,16 @@ ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: subq $1, %r13 ; X64-NEXT: sbbq $0, %r14 -; X64-NEXT: shrq $63, %rbx -; X64-NEXT: xorl %r15d, %ebx +; X64-NEXT: shrq $63, %r15 +; X64-NEXT: xorl %ebx, %r15d ; X64-NEXT: movq %r12, %rdi ; X64-NEXT: movq %rbp, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al -; X64-NEXT: testb %bl, %al +; X64-NEXT: testb %r15b, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF @@ -699,43 +704,45 @@ ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: psrlq $1, %xmm1 ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; X64-NEXT: # xmm1 = mem[2,3,2,3] -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: pcmpgtd %xmm1, %xmm0 -; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: paddq %xmm1, %xmm1 -; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movq %xmm1, %rbx -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: sarq $63, %r12 -; X64-NEXT: shldq $31, %rbx, %r12 -; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; X64-NEXT: # xmm1 = mem[2,3,2,3] -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: pcmpgtd %xmm1, %xmm0 -; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movq %xmm1, %rdx +; X64-NEXT: pshufd $212, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = mem[0,1,1,3] +; X64-NEXT: psllq $32, %xmm0 +; X64-NEXT: movdqa %xmm0, %xmm1 +; X64-NEXT: psrad $31, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X64-NEXT: psrlq $31, %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movq %xmm0, %rbx +; X64-NEXT: movq %rbx, %r13 +; X64-NEXT: sarq $63, %r13 +; X64-NEXT: shldq $31, %rbx, %r13 +; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: pxor %xmm1, %xmm1 +; X64-NEXT: pcmpgtd %xmm0, %xmm1 +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movq %xmm0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: sarq $63, %rbp ; X64-NEXT: movq %rbx, %r15 ; X64-NEXT: shlq $31, %r15 ; X64-NEXT: movq %r15, %rdi -; X64-NEXT: movq %r12, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq %rbp, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %r13 +; X64-NEXT: subq $1, %r12 ; X64-NEXT: sbbq $0, %r14 ; X64-NEXT: shrq $63, %rbx ; X64-NEXT: xorl %ebp, %ebx ; X64-NEXT: movq %r15, %rdi -; X64-NEXT: movq %r12, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %rbp, %rcx ; X64-NEXT: callq __modti3@PLT @@ -743,25 +750,25 @@ ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rcx, %r13 +; X64-NEXT: cmpq %rcx, %r12 ; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-NEXT: cmovbq %r13, %rax +; X64-NEXT: cmovbq %r12, %rax ; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovnsq %rcx, %r13 -; X64-NEXT: cmoveq %rax, %r13 +; X64-NEXT: cmovnsq %rcx, %r12 +; X64-NEXT: cmoveq %rax, %r12 ; X64-NEXT: movl $0, %eax ; X64-NEXT: cmovnsq %rax, %r14 ; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rcx, %r13 +; X64-NEXT: cmpq %rcx, %r12 ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: cmovaq %r13, %rax +; X64-NEXT: cmovaq %r12, %rax ; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovsq %rcx, %r13 +; X64-NEXT: cmovsq %rcx, %r12 ; X64-NEXT: cmpq $-1, %r14 -; X64-NEXT: cmoveq %rax, %r13 -; X64-NEXT: movq %r13, %xmm0 +; X64-NEXT: cmoveq %rax, %r12 +; X64-NEXT: movq %r12, %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] @@ -816,12 +823,12 @@ ; X64-NEXT: cmovsq %rcx, %r12 ; X64-NEXT: cmpq $-1, %r14 ; X64-NEXT: cmoveq %rax, %r12 -; X64-NEXT: movq %r12, %xmm0 -; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X64-NEXT: psrlq $1, %xmm1 -; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; X64-NEXT: movq %r12, %xmm1 +; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: psrlq $1, %xmm0 +; X64-NEXT: shufps $136, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = xmm0[0,2],mem[0,2] ; X64-NEXT: addq $104, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 @@ -840,116 +847,108 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $256, %esp # imm = 0x100 -; X86-NEXT: movl 24(%ebp), %edx -; X86-NEXT: movl 40(%ebp), %edi -; X86-NEXT: leal {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %edi, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %edx, %eax -; X86-NEXT: sarl $31, %eax -; X86-NEXT: addl %edx, %edx -; X86-NEXT: adcl %eax, %eax +; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shldl $31, %edx, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $31, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $1, %eax -; X86-NEXT: negl %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sarl $31, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: leal (%edi,%edi), %eax +; X86-NEXT: shrl $31, %edi +; X86-NEXT: shldl $31, %eax, %edi +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %edx +; X86-NEXT: calll __divti3 +; X86-NEXT: addl $32, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl $0 +; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl 36(%ebp), %esi -; X86-NEXT: movl %esi, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: adcl %edx, %edx +; X86-NEXT: movl 36(%ebp), %edx ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: shldl $31, %ecx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $31, %ecx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $1, %edx -; X86-NEXT: negl %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl 28(%ebp), %ebx -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: addl %eax, %eax -; X86-NEXT: adcl %esi, %esi -; X86-NEXT: movl %esi, %ecx +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx ; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $31, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $1, %esi -; X86-NEXT: negl %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edx -; X86-NEXT: pushl %ebx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: pushl %edi ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 40(%ebp), %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl 16(%ebp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: adcl %ebx, %ebx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: shldl $31, %ecx, %edi -; X86-NEXT: shll $31, %ecx +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $1, %ebx -; X86-NEXT: negl %ebx +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %edx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp @@ -958,39 +957,25 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl 32(%ebp) -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl %eax -; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx ; X86-NEXT: pushl 40(%ebp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl 36(%ebp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp @@ -1005,22 +990,22 @@ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: sets %bl ; X86-NEXT: testl %edi, %edi -; X86-NEXT: sets %al -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %ah -; X86-NEXT: xorb %al, %ah +; X86-NEXT: sets %bh +; X86-NEXT: xorb %bl, %bh ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: orl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl %edi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: setne %al -; X86-NEXT: testb %ah, %al +; X86-NEXT: testb %bh, %al ; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -1030,7 +1015,7 @@ ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1047,7 +1032,7 @@ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bl ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bh @@ -1085,11 +1070,11 @@ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: sets %al ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %al +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: testl %ecx, %ecx ; X86-NEXT: sets %bl ; X86-NEXT: xorb %al, %bl ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax @@ -1100,7 +1085,7 @@ ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp diff --git a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll --- a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll +++ b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll @@ -38,7 +38,7 @@ ; CHECK-NEXT: INLINEASM_BR &"#$0 $1 $2", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, blockaddress(@test1, %ir-block.bb17.i.i.i), 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags ; CHECK-NEXT: JMP_1 %bb.5 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.bb17.i.i.i (address-taken, inlineasm-br-indirect-target): + ; CHECK-NEXT: bb.4.bb17.i.i.i (machine-block-address-taken, ir-block-address-taken %ir-block.bb17.i.i.i, inlineasm-br-indirect-target): ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/X86/threadlocal_address.ll b/llvm/test/CodeGen/X86/threadlocal_address.ll --- a/llvm/test/CodeGen/X86/threadlocal_address.ll +++ b/llvm/test/CodeGen/X86/threadlocal_address.ll @@ -37,5 +37,5 @@ ret i32 %3 } -declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn -declare ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1)) nounwind readnone willreturn +declare nonnull ptr @llvm.threadlocal.address(ptr nonnull) nounwind readnone willreturn +declare nonnull ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) nonnull) nounwind readnone willreturn diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -382,93 +382,85 @@ ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: addl %eax, %eax -; X86-NEXT: setb %cl -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: shll $31, %eax +; X86-NEXT: leal (%eax,%eax), %ecx +; X86-NEXT: shrl $31, %eax +; X86-NEXT: shldl $31, %ecx, %eax ; X86-NEXT: pushl $0 ; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %eax +; X86-NEXT: pushl $0 ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: addl %ebp, %ebp -; X86-NEXT: setb %al -; X86-NEXT: shldl $31, %ebp, %eax -; X86-NEXT: shll $31, %ebp +; X86-NEXT: movl %edx, %edi +; X86-NEXT: leal (%ebx,%ebx), %eax +; X86-NEXT: shrl $31, %ebx +; X86-NEXT: shldl $31, %eax, %ebx ; X86-NEXT: pushl $0 -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl $0 ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: addl %edi, %edi -; X86-NEXT: setb %al -; X86-NEXT: shldl $31, %edi, %eax -; X86-NEXT: shll $31, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: leal (%esi,%esi), %eax +; X86-NEXT: shrl $31, %esi +; X86-NEXT: shldl $31, %eax, %esi ; X86-NEXT: pushl $0 ; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl $0 ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: addl %esi, %esi -; X86-NEXT: setb %al -; X86-NEXT: shldl $31, %esi, %eax -; X86-NEXT: shll $31, %esi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: leal (%edx,%edx), %ecx +; X86-NEXT: shrl $31, %edx +; X86-NEXT: shldl $31, %ecx, %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: cmpl $2, %esi +; X86-NEXT: movl $-1, %edx +; X86-NEXT: cmovael %edx, %eax +; X86-NEXT: cmpl $1, %esi +; X86-NEXT: movl $1, %ebp +; X86-NEXT: cmovael %ebp, %esi +; X86-NEXT: shldl $31, %eax, %esi +; X86-NEXT: cmpl $2, %ebx +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: cmovael %edx, %eax +; X86-NEXT: cmpl $1, %ebx +; X86-NEXT: cmovael %ebp, %ebx +; X86-NEXT: shldl $31, %eax, %ebx +; X86-NEXT: cmpl $2, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovael %edx, %eax +; X86-NEXT: cmpl $1, %edi +; X86-NEXT: cmovael %ebp, %edi +; X86-NEXT: shldl $31, %eax, %edi ; X86-NEXT: pushl $0 ; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: cmpl $2, %edx -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovael %esi, %eax +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: cmovael %ecx, %eax ; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl $1, %ecx -; X86-NEXT: cmovael %ecx, %edx -; X86-NEXT: shldl $31, %eax, %edx -; X86-NEXT: cmpl $2, %edi -; X86-NEXT: cmovael %esi, %ebx -; X86-NEXT: cmpl $1, %edi -; X86-NEXT: cmovael %ecx, %edi -; X86-NEXT: shldl $31, %ebx, %edi -; X86-NEXT: cmpl $2, %ebp -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: cmovael %esi, %eax -; X86-NEXT: cmpl $1, %ebp -; X86-NEXT: cmovael %ecx, %ebp +; X86-NEXT: cmovbl %edx, %ebp ; X86-NEXT: shldl $31, %eax, %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: cmpl $2, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovael %esi, %eax -; X86-NEXT: cmpl $1, %ebx -; X86-NEXT: cmovbl %ebx, %ecx -; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, 12(%eax) -; X86-NEXT: movl %ebp, 8(%eax) -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $12, %esp +; X86-NEXT: movl %ebp, 12(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: addl $8, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -36,7 +36,7 @@ ; SSE2-NEXT: psrlq %xmm4, %xmm1 ; SSE2-NEXT: shufpd {{.*#+}} xmm5 = xmm5[0],xmm1[1] ; SSE2-NEXT: pandn %xmm3, %xmm2 -; SSE2-NEXT: psllq $1, %xmm0 +; SSE2-NEXT: paddq %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psllq %xmm2, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] @@ -56,12 +56,12 @@ ; SSE41-NEXT: psrlq %xmm4, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: pandn %xmm3, %xmm2 -; SSE41-NEXT: psllq $1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psllq %xmm2, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] +; SSE41-NEXT: paddq %xmm0, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: psllq %xmm1, %xmm3 ; SSE41-NEXT: psllq %xmm2, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] ; SSE41-NEXT: por %xmm5, %xmm0 ; SSE41-NEXT: retq ; @@ -74,11 +74,11 @@ ; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -88,7 +88,7 @@ ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq @@ -99,7 +99,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -110,7 +110,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -121,7 +121,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -142,7 +142,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -156,13 +156,13 @@ ; XOPAVX1-LABEL: var_funnnel_v2i64: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpsllq $1, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshlq %xmm4, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 -; XOPAVX1-NEXT: vpshlq %xmm2, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4 +; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -172,7 +172,7 @@ ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq @@ -188,7 +188,7 @@ ; X86-SSE2-NEXT: psrlq %xmm5, %xmm1 ; X86-SSE2-NEXT: shufpd {{.*#+}} xmm3 = xmm3[0],xmm1[1] ; X86-SSE2-NEXT: pandn %xmm4, %xmm2 -; X86-SSE2-NEXT: psllq $1, %xmm0 +; X86-SSE2-NEXT: paddq %xmm0, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 ; X86-SSE2-NEXT: psllq %xmm2, %xmm1 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] @@ -225,7 +225,7 @@ ; SSE2-NEXT: pslld $23, %xmm2 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm1 -; SSE2-NEXT: pslld $1, %xmm0 +; SSE2-NEXT: paddd %xmm0, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE2-NEXT: pmuludq %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -260,7 +260,7 @@ ; SSE41-NEXT: pslld $23, %xmm2 ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm1 -; SSE41-NEXT: pslld $1, %xmm0 +; SSE41-NEXT: paddd %xmm0, %xmm0 ; SSE41-NEXT: pmulld %xmm1, %xmm0 ; SSE41-NEXT: por %xmm6, %xmm0 ; SSE41-NEXT: retq @@ -285,7 +285,7 @@ ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 -; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -296,7 +296,7 @@ ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq @@ -307,7 +307,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -318,7 +318,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -329,7 +329,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -350,7 +350,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -364,13 +364,13 @@ ; XOPAVX1-LABEL: var_funnnel_v4i32: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31] -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpslld $1, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 -; XOPAVX1-NEXT: vpshld %xmm2, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4 +; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -380,7 +380,7 @@ ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpslld $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq @@ -409,7 +409,7 @@ ; X86-SSE2-NEXT: pslld $23, %xmm2 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 ; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1 -; X86-SSE2-NEXT: pslld $1, %xmm0 +; X86-SSE2-NEXT: paddd %xmm0, %xmm0 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -473,7 +473,7 @@ ; SSE2-NEXT: pslld $16, %xmm2 ; SSE2-NEXT: psrad $16, %xmm2 ; SSE2-NEXT: packssdw %xmm4, %xmm2 -; SSE2-NEXT: psllw $1, %xmm0 +; SSE2-NEXT: paddw %xmm0, %xmm0 ; SSE2-NEXT: pmullw %xmm2, %xmm0 ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: por %xmm3, %xmm0 @@ -519,7 +519,7 @@ ; SSE41-NEXT: paddd %xmm4, %xmm0 ; SSE41-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE41-NEXT: packusdw %xmm2, %xmm0 -; SSE41-NEXT: psllw $1, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: pmullw %xmm0, %xmm3 ; SSE41-NEXT: por %xmm1, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 @@ -554,7 +554,7 @@ ; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -608,7 +608,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper @@ -630,7 +630,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllvw %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -644,13 +644,13 @@ ; XOP-LABEL: var_funnnel_v8i16: ; XOP: # %bb.0: ; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] -; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOP-NEXT: vpsllw $1, %xmm0, %xmm0 -; XOP-NEXT: vpshlw %xmm4, %xmm0, %xmm0 -; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; XOP-NEXT: vpsubw %xmm2, %xmm3, %xmm2 -; XOP-NEXT: vpshlw %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOP-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; XOP-NEXT: vpsubw %xmm4, %xmm5, %xmm4 +; XOP-NEXT: vpshlw %xmm4, %xmm1, %xmm1 +; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOP-NEXT: vpaddw %xmm0, %xmm0, %xmm0 +; XOP-NEXT: vpshlw %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; @@ -703,7 +703,7 @@ ; X86-SSE2-NEXT: pslld $16, %xmm2 ; X86-SSE2-NEXT: psrad $16, %xmm2 ; X86-SSE2-NEXT: packssdw %xmm4, %xmm2 -; X86-SSE2-NEXT: psllw $1, %xmm0 +; X86-SSE2-NEXT: paddw %xmm0, %xmm0 ; X86-SSE2-NEXT: pmullw %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: por %xmm3, %xmm0 @@ -1036,7 +1036,7 @@ ; SSE-NEXT: pand %xmm3, %xmm4 ; SSE-NEXT: psrlq %xmm4, %xmm1 ; SSE-NEXT: pandn %xmm3, %xmm2 -; SSE-NEXT: psllq $1, %xmm0 +; SSE-NEXT: paddq %xmm0, %xmm0 ; SSE-NEXT: psllq %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1047,7 +1047,7 @@ ; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1058,7 +1058,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1069,7 +1069,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1080,7 +1080,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -1101,7 +1101,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -1119,7 +1119,7 @@ ; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOP-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq @@ -1131,7 +1131,7 @@ ; X86-SSE2-NEXT: pand %xmm3, %xmm4 ; X86-SSE2-NEXT: psrlq %xmm4, %xmm1 ; X86-SSE2-NEXT: pandn %xmm3, %xmm2 -; X86-SSE2-NEXT: psllq $1, %xmm0 +; X86-SSE2-NEXT: paddq %xmm0, %xmm0 ; X86-SSE2-NEXT: psllq %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl @@ -1256,7 +1256,7 @@ ; SSE-NEXT: pand %xmm3, %xmm4 ; SSE-NEXT: psrlw %xmm4, %xmm1 ; SSE-NEXT: pandn %xmm3, %xmm2 -; SSE-NEXT: psllw $1, %xmm0 +; SSE-NEXT: paddw %xmm0, %xmm0 ; SSE-NEXT: psllw %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1267,7 +1267,7 @@ ; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1278,7 +1278,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1289,7 +1289,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1300,7 +1300,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -1321,7 +1321,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -1339,7 +1339,7 @@ ; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOP-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq @@ -1351,7 +1351,7 @@ ; X86-SSE2-NEXT: pand %xmm3, %xmm4 ; X86-SSE2-NEXT: psrlw %xmm4, %xmm1 ; X86-SSE2-NEXT: pandn %xmm3, %xmm2 -; X86-SSE2-NEXT: psllw $1, %xmm0 +; X86-SSE2-NEXT: paddw %xmm0, %xmm0 ; X86-SSE2-NEXT: psllw %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl @@ -1761,7 +1761,7 @@ ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm1, %xmm2 ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: psllw $1, %xmm0 +; SSE2-NEXT: paddw %xmm0, %xmm0 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: por %xmm1, %xmm0 @@ -1772,7 +1772,7 @@ ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = ; SSE41-NEXT: pmulhuw %xmm1, %xmm2 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; SSE41-NEXT: psllw $1, %xmm0 +; SSE41-NEXT: paddw %xmm0, %xmm0 ; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: por %xmm2, %xmm0 ; SSE41-NEXT: retq @@ -1781,7 +1781,7 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1790,7 +1790,7 @@ ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1799,7 +1799,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1810,7 +1810,7 @@ ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7] ; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8] -; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper @@ -1829,7 +1829,7 @@ ; AVX512VLBW-LABEL: constant_funnnel_v8i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -1843,7 +1843,7 @@ ; XOP-LABEL: constant_funnnel_v8i16: ; XOP: # %bb.0: ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOP-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOP-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq @@ -1853,7 +1853,7 @@ ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 ; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: psllw $1, %xmm0 +; X86-SSE2-NEXT: paddw %xmm0, %xmm0 ; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE2-NEXT: por %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -37,17 +37,17 @@ ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; AVX1-NEXT: vandnps %ymm3, %ymm2, %ymm2 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpsllq $1, %xmm4, %xmm4 -; AVX1-NEXT: vpsllq %xmm3, %xmm4, %xmm5 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] -; AVX1-NEXT: vpsllq %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7] -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm4 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpaddq %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vpsllq %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm2[2,3,2,3] +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm4 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -58,7 +58,7 @@ ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -69,7 +69,7 @@ ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -80,7 +80,7 @@ ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -91,7 +91,7 @@ ; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -111,7 +111,7 @@ ; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VLBW-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -125,23 +125,23 @@ ; XOPAVX1-LABEL: var_funnnel_v4i64: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vmovaps {{.*#+}} ymm3 = [63,63,63,63] -; XOPAVX1-NEXT: vandnps %ymm3, %ymm2, %ymm4 +; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm4 ; XOPAVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; XOPAVX1-NEXT: vpsllq $1, %xmm6, %xmm6 -; XOPAVX1-NEXT: vpshlq %xmm5, %xmm6, %xmm5 -; XOPAVX1-NEXT: vpsllq $1, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshlq %xmm4, %xmm0, %xmm0 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 -; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm2 +; XOPAVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; XOPAVX1-NEXT: vpsubq %xmm5, %xmm6, %xmm5 +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 +; XOPAVX1-NEXT: vpshlq %xmm5, %xmm7, %xmm5 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm6, %xmm4 +; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 +; XOPAVX1-NEXT: vandnps %ymm3, %ymm2, %ymm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; XOPAVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; XOPAVX1-NEXT: vpsubq %xmm3, %xmm4, %xmm3 -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 -; XOPAVX1-NEXT: vpshlq %xmm3, %xmm5, %xmm3 -; XOPAVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 -; XOPAVX1-NEXT: vpshlq %xmm2, %xmm1, %xmm1 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm4, %xmm4, %xmm4 +; XOPAVX1-NEXT: vpshlq %xmm3, %xmm4, %xmm3 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; @@ -151,7 +151,7 @@ ; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; XOPAVX2-NEXT: vpsllq $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -184,7 +184,7 @@ ; AVX1-NEXT: vpaddd %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vpslld $1, %xmm7, %xmm7 +; AVX1-NEXT: vpaddd %xmm7, %xmm7, %xmm7 ; AVX1-NEXT: vpmulld %xmm4, %xmm7, %xmm4 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -202,7 +202,7 @@ ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 ; AVX1-NEXT: vpaddd %xmm6, %xmm2, %xmm2 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 -; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -214,7 +214,7 @@ ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -225,7 +225,7 @@ ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -236,7 +236,7 @@ ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VL-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -247,7 +247,7 @@ ; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512BW-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -267,7 +267,7 @@ ; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VLBW-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VLBW-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -289,13 +289,13 @@ ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [31,31,31,31] ; XOPAVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; XOPAVX1-NEXT: vpslld $1, %xmm7, %xmm7 +; XOPAVX1-NEXT: vpaddd %xmm7, %xmm7, %xmm7 ; XOPAVX1-NEXT: vpshld %xmm3, %xmm7, %xmm3 ; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsubd %xmm2, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpslld $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -307,7 +307,7 @@ ; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; XOPAVX2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; XOPAVX2-NEXT: vpslld $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -348,7 +348,7 @@ ; AVX1-NEXT: vcvttps2dq %xmm6, %xmm6 ; AVX1-NEXT: vpackusdw %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vpsllw $1, %xmm7, %xmm7 +; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 ; AVX1-NEXT: vpmullw %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpor %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpsllw $12, %xmm2, %xmm6 @@ -375,7 +375,7 @@ ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 @@ -427,7 +427,7 @@ ; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -447,7 +447,7 @@ ; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -469,13 +469,13 @@ ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [15,15,15,15,15,15,15,15] ; XOPAVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; XOPAVX1-NEXT: vpsllw $1, %xmm7, %xmm7 +; XOPAVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 ; XOPAVX1-NEXT: vpshlw %xmm3, %xmm7, %xmm3 ; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsubw %xmm2, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpshlw %xmm4, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpshlw %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -484,22 +484,22 @@ ; XOPAVX2-LABEL: var_funnnel_v16i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4 +; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5 -; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6 -; XOPAVX2-NEXT: vpshlw %xmm5, %xmm6, %xmm5 -; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0 -; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; XOPAVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; XOPAVX2-NEXT: vpsubw %xmm5, %xmm6, %xmm5 +; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm7 +; XOPAVX2-NEXT: vpshlw %xmm5, %xmm7, %xmm5 +; XOPAVX2-NEXT: vpsubw %xmm4, %xmm6, %xmm4 +; XOPAVX2-NEXT: vpshlw %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 ; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; XOPAVX2-NEXT: vpsubw %xmm3, %xmm4, %xmm3 -; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm5 -; XOPAVX2-NEXT: vpshlw %xmm3, %xmm5, %xmm3 -; XOPAVX2-NEXT: vpsubw %xmm2, %xmm4, %xmm2 -; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1 -; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 +; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 +; XOPAVX2-NEXT: vpshlw %xmm3, %xmm4, %xmm3 +; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) @@ -782,9 +782,9 @@ ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpsllq $1, %xmm3, %xmm3 +; AVX1-NEXT: vpaddq %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm3 -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -796,7 +796,7 @@ ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -807,7 +807,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -818,7 +818,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -829,7 +829,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -849,7 +849,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -871,9 +871,9 @@ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; XOPAVX1-NEXT: vpsllq $1, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpaddq %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -885,7 +885,7 @@ ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsllq $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -1020,11 +1020,11 @@ ; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpsllw $1, %xmm3, %xmm3 +; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 ; AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -1036,7 +1036,7 @@ ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1047,7 +1047,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -1058,7 +1058,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -1069,7 +1069,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -1089,7 +1089,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -1109,11 +1109,11 @@ ; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 ; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; XOPAVX1-NEXT: vpsllw $1, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -1125,7 +1125,7 @@ ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -1494,10 +1494,10 @@ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm2 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -1508,7 +1508,7 @@ ; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1518,7 +1518,7 @@ ; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -1528,7 +1528,7 @@ ; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -1539,7 +1539,7 @@ ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] -; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -1556,7 +1556,7 @@ ; AVX512VLBW-LABEL: constant_funnnel_v16i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -1573,10 +1573,10 @@ ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm2 ; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -1587,7 +1587,7 @@ ; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -22,7 +22,7 @@ ; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512F-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -33,7 +33,7 @@ ; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512VL-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq @@ -44,7 +44,7 @@ ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -61,7 +61,7 @@ ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -82,7 +82,7 @@ ; AVX512F-NEXT: vpandd %zmm3, %zmm2, %zmm4 ; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandnd %zmm3, %zmm2, %zmm2 -; AVX512F-NEXT: vpslld $1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -93,7 +93,7 @@ ; AVX512VL-NEXT: vpandd %zmm3, %zmm2, %zmm4 ; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandnd %zmm3, %zmm2, %zmm2 -; AVX512VL-NEXT: vpslld $1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq @@ -104,7 +104,7 @@ ; AVX512BW-NEXT: vpandd %zmm3, %zmm2, %zmm4 ; AVX512BW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandnd %zmm3, %zmm2, %zmm2 -; AVX512BW-NEXT: vpslld $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -121,7 +121,7 @@ ; AVX512VLBW-NEXT: vpandd %zmm3, %zmm2, %zmm4 ; AVX512VLBW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandnd %zmm3, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpslld $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -188,7 +188,7 @@ ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -205,7 +205,7 @@ ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -428,7 +428,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -439,7 +439,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq @@ -450,7 +450,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -468,7 +468,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -554,9 +554,9 @@ ; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3 +; AVX512F-NEXT: vpaddw %ymm3, %ymm3, %ymm3 ; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -572,9 +572,9 @@ ; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3 +; AVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -586,7 +586,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -604,7 +604,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -853,7 +853,7 @@ ; AVX512BW-LABEL: constant_funnnel_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -867,7 +867,7 @@ ; AVX512VLBW-LABEL: constant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -963,7 +963,7 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm4 ; SSE41-NEXT: psrlw %xmm3, %xmm4 ; SSE41-NEXT: pandn %xmm2, %xmm1 -; SSE41-NEXT: psllw $1, %xmm0 +; SSE41-NEXT: paddw %xmm0, %xmm0 ; SSE41-NEXT: psllw %xmm1, %xmm0 ; SSE41-NEXT: por %xmm4, %xmm0 ; SSE41-NEXT: retq @@ -974,7 +974,7 @@ ; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -985,7 +985,7 @@ ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512F-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -996,7 +996,7 @@ ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1007,7 +1007,7 @@ ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -1018,7 +1018,7 @@ ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -789,11 +789,11 @@ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm5 ; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $1, %xmm4, %xmm2 +; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -805,7 +805,7 @@ ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -816,7 +816,7 @@ ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -827,7 +827,7 @@ ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -838,7 +838,7 @@ ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -849,7 +849,7 @@ ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -301,9 +301,9 @@ ; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $1, %ymm4, %ymm2 +; AVX512F-NEXT: vpaddw %ymm4, %ymm4, %ymm2 ; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 @@ -318,9 +318,9 @@ ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 ; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw $1, %ymm4, %ymm2 +; AVX512VL-NEXT: vpaddw %ymm4, %ymm4, %ymm2 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 @@ -332,7 +332,7 @@ ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 ; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -343,7 +343,7 @@ ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 ; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll --- a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll +++ b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: pmullw %xmm1, %xmm2 ; CHECK-NEXT: psrlw $15, %xmm2 ; CHECK-NEXT: pmulhw %xmm1, %xmm0 -; CHECK-NEXT: psllw $1, %xmm0 +; CHECK-NEXT: paddw %xmm0, %xmm0 ; CHECK-NEXT: por %xmm2, %xmm0 ; CHECK-NEXT: retq %t = call <4 x i16> @llvm.smul.fix.v4i16(<4 x i16> , <4 x i16> %a, i32 15) @@ -33,7 +33,7 @@ ; CHECK-NEXT: pmullw %xmm1, %xmm2 ; CHECK-NEXT: psrlw $15, %xmm2 ; CHECK-NEXT: pmulhuw %xmm1, %xmm0 -; CHECK-NEXT: psllw $1, %xmm0 +; CHECK-NEXT: paddw %xmm0, %xmm0 ; CHECK-NEXT: por %xmm2, %xmm0 ; CHECK-NEXT: retq %t = call <4 x i16> @llvm.umul.fix.v4i16(<4 x i16> , <4 x i16> %a, i32 15) diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -927,23 +927,23 @@ ; SSE2-LABEL: constant_shift_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psllq $1, %xmm1 -; SSE2-NEXT: psllq $7, %xmm0 -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: psllq $7, %xmm1 +; SSE2-NEXT: paddq %xmm0, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: constant_shift_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psllq $7, %xmm1 -; SSE41-NEXT: psllq $1, %xmm0 +; SSE41-NEXT: paddq %xmm0, %xmm0 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: constant_shift_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: retq ; @@ -975,9 +975,9 @@ ; X86-SSE-LABEL: constant_shift_v2i64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE-NEXT: psllq $1, %xmm1 -; X86-SSE-NEXT: psllq $7, %xmm0 -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X86-SSE-NEXT: psllq $7, %xmm1 +; X86-SSE-NEXT: paddq %xmm0, %xmm0 +; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; X86-SSE-NEXT: retl %shift = shl <2 x i64> %a, ret <2 x i64> %shift diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -1061,7 +1061,7 @@ ; AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -1101,7 +1101,7 @@ ; X86-AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; X86-AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X86-AVX1-NEXT: retl diff --git a/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir b/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir --- a/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir +++ b/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir @@ -168,7 +168,7 @@ TEST8rr killed renamable $r8b, renamable $r8b, implicit-def $eflags JCC_1 %bb.6, 5, implicit $eflags - bb.8.return (address-taken): + bb.8.return (machine-block-address-taken): $eax = MOV32rm $rbp, 1, $noreg, -12, $noreg :: (load (s32) from %stack.0) SEH_Epilogue $rsp = frame-destroy ADD64ri8 $rsp, 48, implicit-def dead $eflags diff --git a/llvm/test/DebugInfo/COFF/global_visibility.ll b/llvm/test/DebugInfo/COFF/global_visibility.ll --- a/llvm/test/DebugInfo/COFF/global_visibility.ll +++ b/llvm/test/DebugInfo/COFF/global_visibility.ll @@ -53,12 +53,12 @@ ; CHECK: } ; CHECK: DataSym { ; CHECK: Kind: S_LDATA32 (0x110C) -; CHECK: DisplayName: foo::local_int +; CHECK: DisplayName: local_int ; CHECK: LinkageName: ?local_int@?1??foo@@YAXXZ@4HA ; CHECK: } ; CHECK: DataSym { ; CHECK: Kind: S_LDATA32 (0x110C) -; CHECK: DisplayName: foo::nested_int +; CHECK: DisplayName: nested_int ; CHECK: LinkageName: ?nested_int@?1??foo@@YAXXZ@4HA ; CHECK: } ; CHECK: ProcEnd { @@ -74,12 +74,12 @@ ; CHECK: } ; CHECK: DataSym { ; CHECK: Kind: S_LDATA32 (0x110C) -; CHECK: DisplayName: bar::local_int +; CHECK: DisplayName: local_int ; CHECK: LinkageName: ?local_int@?1??bar@@YAXXZ@4HA ; CHECK: } ; CHECK: DataSym { ; CHECK: Kind: S_LDATA32 (0x110C) -; CHECK: DisplayName: bar::nested_int +; CHECK: DisplayName: nested_int ; CHECK: LinkageName: ?nested_int@?1??bar@@YAXXZ@4HA ; CHECK: } ; CHECK: ProcEnd { diff --git a/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks.mir b/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks.mir --- a/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks.mir +++ b/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks.mir @@ -34,5 +34,5 @@ RET64 $al, debug-location !10 bb.1: successors: - bb.25.if.else.i103.i (address-taken): + bb.25.if.else.i103.i (machine-block-address-taken): JMP_1 %bb.28 diff --git a/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks2.mir b/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks2.mir --- a/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks2.mir +++ b/llvm/test/DebugInfo/MIR/X86/ldv_unreachable_blocks2.mir @@ -29,4 +29,4 @@ bb.2: successors: TRAP - bb.21.do.body.i129.i (address-taken): + bb.21.do.body.i129.i (machine-block-address-taken): diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_minimal.s b/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_minimal.s new file mode 100644 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_minimal.s @@ -0,0 +1,18 @@ +# RUN: llvm-mc -triple=i386-unknown-linux-gnu -position-independent -filetype=obj -o %t.o %s +# RUN: llvm-jitlink -noexec %t.o + + .text + .globl main + .p2align 4 + .type main,@function +main: + pushl %ebp + movl %esp, %ebp + pushl %eax + movl $0, -4(%ebp) + movl $42, %eax + addl $4, %esp + popl %ebp + retl + + .size main, .-main \ No newline at end of file diff --git a/llvm/test/ExecutionEngine/JITLink/i386/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/i386/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/i386/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'i386' in config.root.targets: + config.unsupported = True \ No newline at end of file diff --git a/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-integer.s b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-integer.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-integer.s @@ -0,0 +1,10 @@ +@ RUN: llvm-mc -triple arm -filetype obj -o - %s | \ +@ RUN: llvm-readobj -A - | \ +@ RUN: FileCheck %s + +.eabi_attribute Tag_also_compatible_with, "\015\001" +@ CHECK: Attribute +@ CHECK: Tag: 65 +@ CHECK: TagName: also_compatible_with +@ CHECK: Value: \015\001 +@ CHECK: Description: Tag_PCS_config = 1 diff --git a/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-invalid_arch.s b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-invalid_arch.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-invalid_arch.s @@ -0,0 +1,6 @@ +@ RUN: llvm-mc -triple arm -filetype obj -o - %s | \ +@ RUN: llvm-readobj -A - 2>&1 | \ +@ RUN: FileCheck %s --check-prefix=CHECK-WARNING + +.eabi_attribute Tag_also_compatible_with, "\006\143" +@ CHECK-WARNING: 99 is not a valid Tag_CPU_arch value diff --git a/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-invalid_tag.s b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-invalid_tag.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-invalid_tag.s @@ -0,0 +1,6 @@ +@ RUN: llvm-mc -triple arm -filetype obj -o - %s | \ +@ RUN: llvm-readobj -A - 2>&1 | \ +@ RUN: FileCheck %s --check-prefix=CHECK-WARNING + +.eabi_attribute Tag_also_compatible_with, "\074\001" +@ CHECK-WARNING: 60 is not a valid tag number diff --git a/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-recursive.s b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-recursive.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-recursive.s @@ -0,0 +1,6 @@ +@ RUN: llvm-mc -triple arm -filetype obj -o - %s | \ +@ RUN: llvm-readobj -A - 2>&1 | \ +@ RUN: FileCheck %s --check-prefix=CHECK-WARNING + +.eabi_attribute Tag_also_compatible_with, "\101\006\017" +@ CHECK-WARNING: Tag_also_compatible_with cannot be recursively defined diff --git a/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-string.s b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-string.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/ARM/directive-eabi_attribute-also_compatible_with-string.s @@ -0,0 +1,10 @@ +@ RUN: llvm-mc -triple arm -filetype obj -o - %s | \ +@ RUN: llvm-readobj -A - | \ +@ RUN: FileCheck %s + +.eabi_attribute Tag_also_compatible_with, "\005Cortex-A7" +@ CHECK: Attribute +@ CHECK: Tag: 65 +@ CHECK: TagName: also_compatible_with +@ CHECK: Value: \005Cortex-A7 +@ CHECK: Description: Tag_CPU_name = Cortex-A7 diff --git a/llvm/test/MC/ARM/directive-eabi_attribute.s b/llvm/test/MC/ARM/directive-eabi_attribute.s --- a/llvm/test/MC/ARM/directive-eabi_attribute.s +++ b/llvm/test/MC/ARM/directive-eabi_attribute.s @@ -234,13 +234,11 @@ @ CHECK-OBJ-NEXT: TagName: nodefaults @ CHECK-OBJ-NEXT: Description: Unspecified Tags UNDEFINED .eabi_attribute Tag_also_compatible_with, "\006\017" -@ The value for Tag_also_compatible_with should be a pair of a tag (ULEB128) + -@ a value (ULEB128 + null or NTBS). llvm-readobj doesn't now how to process -@ this yet, so we use the encoded value explicitly here. @ CHECK: .eabi_attribute 65, "\006\017" @ CHECK-OBJ: Tag: 65 @ CHECK-OBJ-NEXT: TagName: also_compatible_with -@ CHECK-OBJ-NEXT: Value: +@ CHECK-OBJ-NEXT: Value: \006\017 +@ CHECK-OBJ-NEXT: Description: Tag_CPU_arch = 15 (ARM v8-R) .eabi_attribute Tag_T2EE_use, 0 @ CHECK: .eabi_attribute 66, 0 @ CHECK-OBJ: Tag: 66 diff --git a/llvm/test/MC/ELF/debug-hash-file.s b/llvm/test/MC/ELF/debug-hash-file.s --- a/llvm/test/MC/ELF/debug-hash-file.s +++ b/llvm/test/MC/ELF/debug-hash-file.s @@ -23,6 +23,26 @@ // DWARF5-NEXT: dir_index: 0 // DWARF5-NOT: file_names[ 1]: +// RUN: llvm-mc -triple=x86_64 -filetype=obj -g -dwarf-version=4 -fdebug-prefix-map=/MyTest=/src_root %s -o %t.4.o +// RUN: llvm-dwarfdump -debug-info -debug-line %t.4.o | FileCheck %s --check-prefixes=MAP,MAP_V4 +// RUN: llvm-mc -triple=x86_64 -filetype=obj -g -dwarf-version=5 -fdebug-prefix-map=/MyTest=/src_root %s -o %t.5.o +// RUN: llvm-dwarfdump -debug-info -debug-line %t.5.o | FileCheck %s --check-prefixes=MAP,MAP_V5 + +// MAP-LABEL: DW_TAG_compile_unit +// MAP: DW_AT_name ("/src_root/Inputs{{(/|\\)+}}other.S") +// MAP-LABEL: DW_TAG_label +// MAP: DW_AT_decl_file ("/src_root/Inputs{{(/|\\)+}}other.S") + +// MAP_V4: include_directories[ 1] = "/src_root/Inputs" +// MAP_V4-NEXT: file_names[ 1]: +// MAP_V4-NEXT: name: "other.S" +// MAP_V4-NEXT: dir_index: 1 + +// MAP_V5: include_directories[ 0] = "{{.*}}" +// MAP_V5-NEXT: file_names[ 0]: +// MAP_V5-NEXT: name: "/src_root/Inputs/other.S" +// MAP_V5-NEXT: dir_index: 0 + # 1 "/MyTest/Inputs/other.S" foo: diff --git a/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll b/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll --- a/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll +++ b/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll @@ -22,9 +22,8 @@ define double @fadd_neginf(double %a, double %b) #0 { ; CHECK-LABEL: @fadd_neginf( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A]], double [[B]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %2 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -35,9 +34,8 @@ define double @fadd_maytrap(double %a, double %b) #0 { ; CHECK-LABEL: @fadd_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 %2 = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -60,9 +58,8 @@ define double @fsub_neginf(double %a, double %b) #0 { ; CHECK-LABEL: @fsub_neginf( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A]], double [[B]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %2 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -73,9 +70,8 @@ define double @fsub_maytrap(double %a, double %b) #0 { ; CHECK-LABEL: @fsub_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 %2 = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -98,9 +94,8 @@ define double @fmul_neginf(double %a, double %b) #0 { ; CHECK-LABEL: @fmul_neginf( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A]], double [[B]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %2 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -110,9 +105,8 @@ define double @fmul_maytrap(double %a, double %b) #0 { ; CHECK-LABEL: @fmul_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 %2 = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -135,9 +129,8 @@ define double @fdiv_neginf(double %a, double %b) #0 { ; CHECK-LABEL: @fdiv_neginf( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A]], double [[B]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -148,9 +141,8 @@ define double @fdiv_maytrap(double %a, double %b) #0 { ; CHECK-LABEL: @fdiv_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 %2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -173,9 +165,8 @@ define double @frem_neginf(double %a, double %b) #0 { ; CHECK-LABEL: @frem_neginf( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A]], double [[B]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %2 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -186,9 +177,8 @@ define double @frem_maytrap(double %a, double %b) #0 { ; CHECK-LABEL: @frem_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A:%.*]], double [[B:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.frem.f64(double [[A]], double [[B]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP2]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 %2 = call double @llvm.experimental.constrained.frem.f64(double %a, double %b, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -211,9 +201,8 @@ define i32 @fptoui_maytrap(double %a) #0 { ; CHECK-LABEL: @fptoui_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A:%.*]], metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double [[A]], metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 %2 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 @@ -236,9 +225,8 @@ define double @uitofp_neginf(i32 %a) #0 { ; CHECK-LABEL: @uitofp_neginf( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %2 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -249,9 +237,8 @@ define double @uitofp_maytrap(i32 %a) #0 { ; CHECK-LABEL: @uitofp_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[A]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 %2 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -274,9 +261,8 @@ define i32 @fptosi_maytrap(double %a) #0 { ; CHECK-LABEL: @fptosi_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A:%.*]], metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double [[A]], metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @bar.i32(i32 [[TMP1]], i32 [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 %2 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.maytrap") #0 @@ -299,9 +285,8 @@ define double @sitofp_neginf(i32 %a) #0 { ; CHECK-LABEL: @sitofp_neginf( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -312,9 +297,8 @@ define double @sitofp_maytrap(i32 %a) #0 { ; CHECK-LABEL: @sitofp_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 [[A]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = call double @foo.f64(double [[TMP1]], double [[TMP1]]) #[[ATTR0]] +; CHECK-NEXT: ret double [[TMP1]] ; %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 %2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0 @@ -340,11 +324,9 @@ define i1 @fcmp_maytrap(double %a, double %b) #0 { ; CHECK-LABEL: @fcmp_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[A]], double [[B]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP1]] ; %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 @@ -372,11 +354,9 @@ define i1 @fcmps_maytrap(double %a, double %b) #0 { ; CHECK-LABEL: @fcmps_maytrap( ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A:%.*]], double [[B:%.*]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double [[A]], double [[B]], metadata !"oeq", metadata !"fpexcept.maytrap") #[[ATTR0]] -; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] +; CHECK-NEXT: ret i1 [[TMP1]] ; %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 diff --git a/llvm/test/Transforms/EarlyCSE/tfpropagation.ll b/llvm/test/Transforms/EarlyCSE/tfpropagation.ll --- a/llvm/test/Transforms/EarlyCSE/tfpropagation.ll +++ b/llvm/test/Transforms/EarlyCSE/tfpropagation.ll @@ -128,10 +128,10 @@ ; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 1.000000e+00, double [[CONV1]], metadata !"ogt", metadata !"fpexcept.maytrap") #[[ATTR0]] ; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END3:%.*]] ; CHECK: if.then3: -; CHECK-NEXT: [[C:%.*]] = call double @truefunc.f64.i1(i1 [[CMP2]]) #[[ATTR0]] +; CHECK-NEXT: [[C:%.*]] = call double @truefunc.f64.i1(i1 true) #[[ATTR0]] ; CHECK-NEXT: br label [[OUT:%.*]] ; CHECK: if.end3: -; CHECK-NEXT: [[D:%.*]] = call double @falsefunc.f64.i1(i1 [[CMP2]]) #[[ATTR0]] +; CHECK-NEXT: [[D:%.*]] = call double @falsefunc.f64.i1(i1 false) #[[ATTR0]] ; CHECK-NEXT: br label [[OUT]] ; CHECK: out: ; CHECK-NEXT: ret double [[CONV1]] @@ -152,6 +152,38 @@ ret double %conv1 } +; TODO: Fix this optimization so it works with strict exception behavior. +; TODO: This may or may not be worth the added complication and risk. +define double @branching_ebstrict(i64 %a) #0 { +; CHECK-LABEL: @branching_ebstrict( +; CHECK-NEXT: [[CONV1:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 1.000000e+00, double [[CONV1]], metadata !"ogt", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END3:%.*]] +; CHECK: if.then3: +; CHECK-NEXT: [[C:%.*]] = call double @truefunc.f64.i1(i1 [[CMP2]]) #[[ATTR0]] +; CHECK-NEXT: br label [[OUT:%.*]] +; CHECK: if.end3: +; CHECK-NEXT: [[D:%.*]] = call double @falsefunc.f64.i1(i1 [[CMP2]]) #[[ATTR0]] +; CHECK-NEXT: br label [[OUT]] +; CHECK: out: +; CHECK-NEXT: ret double [[CONV1]] +; + %conv1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %cmp2 = call i1 @llvm.experimental.constrained.fcmps.f64(double 1.000000e+00, double %conv1, metadata !"ogt", metadata !"fpexcept.strict") #0 + br i1 %cmp2, label %if.then3, label %if.end3 + +if.then3: + %c = call double @truefunc.f64.i1(i1 %cmp2) #0 + br label %out + +if.end3: + %d = call double @falsefunc.f64.i1(i1 %cmp2) #0 + br label %out + +out: + ret double %conv1 +} + declare double @truefunc.f64.i1(i1) declare double @falsefunc.f64.i1(i1) declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) #0 diff --git a/llvm/test/Transforms/InstCombine/ffs-i16.ll b/llvm/test/Transforms/InstCombine/ffs-i16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ffs-i16.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; Test that the ffs library call simplifier works correctly even for +; targets with 16-bit int. +; +; RUN: opt < %s -mtriple=avr-linux -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=msp430-linux -passes=instcombine -S | FileCheck %s + +declare i16 @ffs(i16) + +declare void @sink(i16) + + +define void @fold_ffs(i16 %x) { +; CHECK-LABEL: @fold_ffs( +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 1) +; CHECK-NEXT: [[CTTZ:%.*]] = call i16 @llvm.cttz.i16(i16 [[X:%.*]], i1 true), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[CTTZ]], 1 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i16 [[X]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[DOTNOT]], i16 0, i16 [[TMP1]] +; CHECK-NEXT: call void @sink(i16 [[TMP2]]) +; CHECK-NEXT: ret void +; + %n0 = call i16 @ffs(i16 0) + call void @sink(i16 %n0) + + %n1 = call i16 @ffs(i16 1) + call void @sink(i16 %n1) + + %nx = call i16 @ffs(i16 %x) + call void @sink(i16 %nx) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/fls-i16.ll b/llvm/test/Transforms/InstCombine/fls-i16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fls-i16.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; Test that the fls library call simplifier works correctly even for +; targets with 16-bit int. Although fls is available on a number of +; targets it's supported (hardcoded as available) only on FreeBSD. +; +; RUN: opt < %s -mtriple=avr-freebsd -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=msp430-freebsd -passes=instcombine -S | FileCheck %s + +declare i16 @fls(i16) + +declare void @sink(i16) + + +define void @fold_fls(i16 %x) { +; CHECK-LABEL: @fold_fls( +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 1) +; CHECK-NEXT: [[CTLZ:%.*]] = call i16 @llvm.ctlz.i16(i16 [[X:%.*]], i1 false), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i16 16, [[CTLZ]] +; CHECK-NEXT: call void @sink(i16 [[TMP1]]) +; CHECK-NEXT: ret void +; + %n0 = call i16 @fls(i16 0) + call void @sink(i16 %n0) + + %n1 = call i16 @fls(i16 1) + call void @sink(i16 %n1) + + %nx = call i16 @fls(i16 %x) + call void @sink(i16 %nx) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll --- a/llvm/test/Transforms/InstCombine/fortify-folding.ll +++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll @@ -246,7 +246,7 @@ define i64 @test_strlcpy() { ; CHECK-LABEL: @test_strlcpy( -; CHECK-NEXT: [[STRLCPY:%.*]] = call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) +; CHECK-NEXT: [[STRLCPY:%.*]] = call i64 @strlcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) ; CHECK-NEXT: ret i64 [[STRLCPY]] ; %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 @@ -268,7 +268,7 @@ define i64 @test_strlcpy_tail() { ; CHECK-LABEL: @test_strlcpy_tail( -; CHECK-NEXT: [[STRLCPY:%.*]] = tail call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) +; CHECK-NEXT: [[STRLCPY:%.*]] = tail call i64 @strlcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22) ; CHECK-NEXT: ret i64 [[STRLCPY]] ; %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0 diff --git a/llvm/test/Transforms/InstCombine/isascii-i16.ll b/llvm/test/Transforms/InstCombine/isascii-i16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/isascii-i16.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Test that the isascii library call simplifier works correctly even for +; targets with 16-bit int. +; +; RUN: opt < %s -mtriple=avr-freebsd -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=msp430-linux -passes=instcombine -S | FileCheck %s + +declare i16 @isascii(i16) + +declare void @sink(i16) + + +define void @fold_isascii(i16 %c) { +; CHECK-LABEL: @fold_isascii( +; CHECK-NEXT: call void @sink(i16 1) +; CHECK-NEXT: call void @sink(i16 1) +; CHECK-NEXT: call void @sink(i16 1) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: [[ISASCII:%.*]] = icmp ult i16 [[C:%.*]], 128 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[ISASCII]] to i16 +; CHECK-NEXT: call void @sink(i16 [[TMP1]]) +; CHECK-NEXT: ret void +; + %i0 = call i16 @isascii(i16 0) + call void @sink(i16 %i0) + + %i1 = call i16 @isascii(i16 1) + call void @sink(i16 %i1) + + %i127 = call i16 @isascii(i16 127) + call void @sink(i16 %i127) + + %i128 = call i16 @isascii(i16 128) + call void @sink(i16 %i128) + + %i255 = call i16 @isascii(i16 255) + call void @sink(i16 %i255) + + %i256 = call i16 @isascii(i16 256) + call void @sink(i16 %i256) + + ; Fold isascii(INT_MAX) to 0. The call is valid with all int values. + %imax = call i16 @isascii(i16 32767) + call void @sink(i16 %imax) + + %uimax = call i16 @isascii(i16 65535) + call void @sink(i16 %uimax) + + %ic = call i16 @isascii(i16 %c) + call void @sink(i16 %ic) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/isdigit-i16.ll b/llvm/test/Transforms/InstCombine/isdigit-i16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/isdigit-i16.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Test that the isdigit library call simplifier works correctly even for +; targets with 16-bit int. +; +; RUN: opt < %s -mtriple=avr-linux -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=msp430-freebsd -passes=instcombine -S | FileCheck %s + +declare i16 @isdigit(i16) + +declare void @sink(i16) + +define void @fold_isdigit(i16 %c) { +; CHECK-LABEL: @fold_isdigit( +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 1) +; CHECK-NEXT: call void @sink(i16 1) +; CHECK-NEXT: call void @sink(i16 1) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: call void @sink(i16 0) +; CHECK-NEXT: [[ISDIGITTMP:%.*]] = add i16 [[C:%.*]], -48 +; CHECK-NEXT: [[ISDIGIT:%.*]] = icmp ult i16 [[ISDIGITTMP]], 10 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[ISDIGIT]] to i16 +; CHECK-NEXT: call void @sink(i16 [[TMP1]]) +; CHECK-NEXT: ret void +; + %i0 = call i16 @isdigit(i16 0) + call void @sink(i16 %i0) + + %i1 = call i16 @isdigit(i16 1) + call void @sink(i16 %i1) + + ; Fold isdigit('/') to 0. + %i47 = call i16 @isdigit(i16 47) + call void @sink(i16 %i47) + +; Fold isdigit('0') to 1. + %i48 = call i16 @isdigit(i16 48) + call void @sink(i16 %i48) + + ; Fold isdigit('1') to 1. + %i49 = call i16 @isdigit(i16 49) + call void @sink(i16 %i49) + + ; Fold isdigit('9') to 1. + %i57 = call i16 @isdigit(i16 57) + call void @sink(i16 %i57) + + ; Fold isdigit(':') to 0. + %i58 = call i16 @isdigit(i16 58) + call void @sink(i16 %i58) + + %i127 = call i16 @isdigit(i16 127) + call void @sink(i16 %i127) + + %i128 = call i16 @isdigit(i16 128) + call void @sink(i16 %i128) + + %i255 = call i16 @isdigit(i16 255) + call void @sink(i16 %i255) + + ; Fold isdigit(256) to 0. The argument is required to be representable + ; in unsigned char but it's a common mistake to call the function with + ; other arguments and it's arguably safer to fold such calls than to + ; let the library call return an arbitrary value or crash. + %i256 = call i16 @isdigit(i16 256) + call void @sink(i16 %i256) + + ; Same as above. + %imax = call i16 @isdigit(i16 32767) + call void @sink(i16 %imax) + + %ic = call i16 @isdigit(i16 %c) + call void @sink(i16 %ic) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/known-phi-br.ll b/llvm/test/Transforms/InstCombine/known-phi-br.ll --- a/llvm/test/Transforms/InstCombine/known-phi-br.ll +++ b/llvm/test/Transforms/InstCombine/known-phi-br.ll @@ -6,7 +6,7 @@ ; the known bits of a phi edge based off a conditional branch feeding the phi. ; -; TODO: %x either eq 7 or is set to 7 +; %x either eq 7 or is set to 7 define i64 @limit_i64_eq_7(i64 %x) { ; CHECK-LABEL: @limit_i64_eq_7( ; CHECK-NEXT: entry: @@ -15,8 +15,7 @@ ; CHECK: body: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ 7, [[BODY]] ] -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 7 ; entry: %cmp = icmp eq i64 %x, 7 @@ -28,20 +27,21 @@ ret i64 %res } -; TODO: %x either eq 255 or is set to 255 +; %x either eq 255 or is set to 255 define i64 @limit_i64_ne_255(i64 %x) { ; CHECK-LABEL: @limit_i64_ne_255( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[X:%.*]], 255 -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[END:%.*]], label [[BODY:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[X:%.*]], 255 +; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: br i1 [[CMP]], label [[BODY:%.*]], label [[END:%.*]] ; CHECK: body: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ 255, [[BODY]] ] -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 255 ; entry: %cmp = icmp ne i64 %x, 255 + call void @use(i1 %cmp) br i1 %cmp, label %body, label %end body: br label %end @@ -49,8 +49,9 @@ %res = phi i64 [ %x, %entry ], [ 255, %body ] ret i64 %res } +declare void @use(i1) -; TODO: %x either ule 15 or is masked with 15 +; %x either ule 15 or is masked with 15 define i64 @limit_i64_ule_15(i64 %x) { ; CHECK-LABEL: @limit_i64_ule_15( ; CHECK-NEXT: entry: @@ -61,8 +62,7 @@ ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] -; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 15 -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[X_MASK]] ; entry: %cmp = icmp ule i64 %x, 15 @@ -76,7 +76,7 @@ ret i64 %res } -; TODO: %x either uge 8 or is masked with 7 +; %x either uge 8 or is masked with 7 define i64 @limit_i64_uge_8(i64 %x) { ; CHECK-LABEL: @limit_i64_uge_8( ; CHECK-NEXT: entry: @@ -87,8 +87,7 @@ ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] -; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 7 -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[X_MASK]] ; entry: %cmp = icmp uge i64 %x, 8 @@ -102,7 +101,7 @@ ret i64 %res } -; TODO: %x either ult 8 or is masked with 7 +; %x either ult 8 or is masked with 7 define i64 @limit_i64_ult_8(i64 %x) { ; CHECK-LABEL: @limit_i64_ult_8( ; CHECK-NEXT: entry: @@ -113,8 +112,7 @@ ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] -; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 7 -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[X_MASK]] ; entry: %cmp = icmp ult i64 %x, 8 @@ -128,7 +126,7 @@ ret i64 %res } -; TODO: %x either ugt 7 or is masked with 7 +; %x either ugt 7 or is masked with 7 define i64 @limit_i64_ugt_7(i64 %x) { ; CHECK-LABEL: @limit_i64_ugt_7( ; CHECK-NEXT: entry: @@ -139,8 +137,7 @@ ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] -; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 7 -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[X_MASK]] ; entry: %cmp = icmp ugt i64 %x, 7 @@ -154,4 +151,58 @@ ret i64 %res } +; +; negative tests +; + +; %x either ule 15 or is masked with 15 +define i64 @limit_i64_ule_15_mask3(i64 %x) { +; CHECK-LABEL: @limit_i64_ule_15_mask3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X:%.*]], 16 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X]], 15 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] +; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 3 +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %cmp = icmp ule i64 %x, 15 + br i1 %cmp, label %end, label %body +body: + %mask = and i64 %x, 15 + br label %end +end: + %x.mask = phi i64 [ %x, %entry ], [ %mask, %body ] + %res = and i64 %x.mask, 3 + ret i64 %res +} +; %x either ult 8 or is masked with 7 +define i64 @limit_i64_ult_8_mask1(i64 %x) { +; CHECK-LABEL: @limit_i64_ult_8_mask1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X:%.*]], 8 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X]], 7 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[X_MASK:%.*]] = phi i64 [ [[X]], [[ENTRY:%.*]] ], [ [[MASK]], [[BODY]] ] +; CHECK-NEXT: [[RES:%.*]] = and i64 [[X_MASK]], 1 +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + %cmp = icmp ult i64 %x, 8 + br i1 %cmp, label %end, label %body +body: + %mask = and i64 %x, 7 + br label %end +end: + %x.mask = phi i64 [ %x, %entry ], [ %mask, %body ] + %res = and i64 %x.mask, 1 + ret i64 %res +} diff --git a/llvm/test/Transforms/InstCombine/printf-i16.ll b/llvm/test/Transforms/InstCombine/printf-i16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/printf-i16.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; RUN: opt < %s -mtriple=avr-freebsd -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=msp430-linux -passes=instcombine -S | FileCheck %s +; +; Verify that the puts to putchar transformation works correctly even for +; targets with 16-bit int. + +declare i16 @putchar(i16) +declare i16 @puts(i8*) + +@s1 = constant [2 x i8] c"\01\00" +@s7f = constant [2 x i8] c"\7f\00" +@s80 = constant [2 x i8] c"\80\00" +@sff = constant [2 x i8] c"\ff\00" + +@pcnt_c = constant [3 x i8] c"%c\00" +@pcnt_s = constant [3 x i8] c"%s\00" + +declare i16 @printf(i8*, ...) + +; Verfify that the three printf to putchar transformations all result +; in the same output for calls with equivalent arguments. + +define void @xform_printf(i8 %c8, i16 %c16) { +; CHECK-LABEL: @xform_printf( +; CHECK-NEXT: [[PUTCHAR:%.*]] = call i16 @putchar(i16 1) +; CHECK-NEXT: [[PUTCHAR1:%.*]] = call i16 @putchar(i16 1) +; CHECK-NEXT: [[PUTCHAR2:%.*]] = call i16 @putchar(i16 1) +; CHECK-NEXT: [[PUTCHAR3:%.*]] = call i16 @putchar(i16 127) +; CHECK-NEXT: [[PUTCHAR4:%.*]] = call i16 @putchar(i16 127) +; CHECK-NEXT: [[PUTCHAR5:%.*]] = call i16 @putchar(i16 127) +; CHECK-NEXT: [[PUTCHAR6:%.*]] = call i16 @putchar(i16 128) +; CHECK-NEXT: [[PUTCHAR7:%.*]] = call i16 @putchar(i16 128) +; CHECK-NEXT: [[PUTCHAR8:%.*]] = call i16 @putchar(i16 128) +; CHECK-NEXT: [[PUTCHAR9:%.*]] = call i16 @putchar(i16 255) +; CHECK-NEXT: [[PUTCHAR10:%.*]] = call i16 @putchar(i16 255) +; CHECK-NEXT: [[PUTCHAR11:%.*]] = call i16 @putchar(i16 255) +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[C8:%.*]] to i16 +; CHECK-NEXT: [[PUTCHAR12:%.*]] = call i16 @putchar(i16 [[TMP1]]) +; CHECK-NEXT: [[PUTCHAR13:%.*]] = call i16 @putchar(i16 [[C16:%.*]]) +; CHECK-NEXT: ret void +; + %ppcnt_c = getelementptr [3 x i8], [3 x i8]* @pcnt_c, i32 0, i32 0 + %ppcnt_s = getelementptr [3 x i8], [3 x i8]* @pcnt_s, i32 0, i32 0 + + %ps1 = getelementptr [2 x i8], [2 x i8]* @s1, i32 0, i32 0 + call i16 (i8*, ...) @printf(i8* %ps1) + call i16 (i8*, ...) @printf(i8* %ppcnt_c, i16 1) + call i16 (i8*, ...) @printf(i8* %ppcnt_s, i8* %ps1) + + %ps7f = getelementptr [2 x i8], [2 x i8]* @s7f, i32 0, i32 0 + call i16 (i8*, ...) @printf(i8* %ps7f) + call i16 (i8*, ...) @printf(i8* %ppcnt_c, i16 127) + call i16 (i8*, ...) @printf(i8* %ppcnt_s, i8* %ps7f) + + %ps80 = getelementptr [2 x i8], [2 x i8]* @s80, i32 0, i32 0 + call i16 (i8*, ...) @printf(i8* %ps80) + call i16 (i8*, ...) @printf(i8* %ppcnt_c, i16 128) + call i16 (i8*, ...) @printf(i8* %ppcnt_s, i8* %ps80) + + %psff = getelementptr [2 x i8], [2 x i8]* @sff, i32 0, i32 0 + call i16 (i8*, ...) @printf(i8* %psff) + call i16 (i8*, ...) @printf(i8* %ppcnt_c, i16 255) + call i16 (i8*, ...) @printf(i8* %ppcnt_s, i8* %psff) + +; The i8 argument to printf can be either zero-extended or sign-extended +; when passed to putchar which then converts it to unsigned char. + call i16 (i8*, ...) @printf(i8* %ppcnt_c, i8 %c8) + call i16 (i8*, ...) @printf(i8* %ppcnt_c, i16 %c16) + ret void +} diff --git a/llvm/test/Transforms/InstCombine/puts-i16.ll b/llvm/test/Transforms/InstCombine/puts-i16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/puts-i16.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; RUN: opt < %s -mtriple=avr-linux -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=msp430-freebsd -passes=instcombine -S | FileCheck %s +; +; Test that the puts to putchar transformation works correctly even for +; targets with 16-bit int. + +declare i16 @putchar(i16) +declare i16 @puts(i8*) + +@empty = constant [1 x i8] c"\00" + +define void @xform_puts(i16 %c) { +; CHECK-LABEL: @xform_puts( +; CHECK-NEXT: [[PUTCHAR:%.*]] = call i16 @putchar(i16 10) +; CHECK-NEXT: ret void +; +; Transform puts("") to putchar("\n"). + %s = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0 + call i16 @puts(i8* %s) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll b/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/remove-loop-phi-fastmul.ll @@ -0,0 +1,283 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +define double @test_mul_fast_flags(ptr %arr_d) { +; CHECK-LABEL: @test_mul_fast_flags( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret double [[MUL]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %f_prod.01 = phi double [ 0.000000e+00, %entry ], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds [1000 x double], ptr %arr_d, i64 0, i64 %i.02 + %0 = load double, ptr %arrayidx, align 8 + %mul = fmul fast double %f_prod.01, %0 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %end + +end: ; preds = %for.body + %f_prod.0.lcssa = phi double [ %mul, %for.body ] + ret double %f_prod.0.lcssa +} + +define double @test_nsz_nnan_flags_enabled(ptr %arr_d) { +; CHECK-LABEL: @test_nsz_nnan_flags_enabled( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL]] = fmul nnan nsz double [[F_PROD_01]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret double [[MUL]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %f_prod.01 = phi double [ 0.000000e+00, %entry ], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds [1000 x double], ptr %arr_d, i64 0, i64 %i.02 + %0 = load double, ptr %arrayidx, align 8 + %mul = fmul nsz nnan double %f_prod.01, %0 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %end + +end: ; preds = %for.body + %f_prod.0.lcssa = phi double [ %mul, %for.body ] + ret double %f_prod.0.lcssa +} + +define double @test_nnan_flag_enabled(ptr %arr_d) { +; CHECK-LABEL: @test_nnan_flag_enabled( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL]] = fmul nnan double [[F_PROD_01]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret double [[MUL]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %f_prod.01 = phi double [ 0.000000e+00, %entry ], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds [1000 x double], ptr %arr_d, i64 0, i64 %i.02 + %0 = load double, ptr %arrayidx, align 8 + %mul = fmul nnan double %f_prod.01, %0 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %end + +end: ; preds = %for.body + %f_prod.0.lcssa = phi double [ %mul, %for.body ] + ret double %f_prod.0.lcssa +} + +define double @test_ninf_flag_enabled(ptr %arr_d) { +; CHECK-LABEL: @test_ninf_flag_enabled( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL]] = fmul ninf double [[F_PROD_01]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret double [[MUL]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %f_prod.01 = phi double [ 0.000000e+00, %entry ], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds [1000 x double], ptr %arr_d, i64 0, i64 %i.02 + %0 = load double, ptr %arrayidx, align 8 + %mul = fmul ninf double %f_prod.01, %0 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %end + +end: ; preds = %for.body + %f_prod.0.lcssa = phi double [ %mul, %for.body ] + ret double %f_prod.0.lcssa +} + +define double @test_nsz_flag_enabled(ptr %arr_d) { +; CHECK-LABEL: @test_nsz_flag_enabled( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL]] = fmul nsz double [[F_PROD_01]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret double [[MUL]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %f_prod.01 = phi double [ 0.000000e+00, %entry ], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds [1000 x double], ptr %arr_d, i64 0, i64 %i.02 + %0 = load double, ptr %arrayidx, align 8 + %mul = fmul nsz double %f_prod.01, %0 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %end + +end: ; preds = %for.body + %f_prod.0.lcssa = phi double [ %mul, %for.body ] + ret double %f_prod.0.lcssa +} + +define double @test_phi_initalise_to_non_zero(ptr %arr_d) { +; CHECK-LABEL: @test_phi_initalise_to_non_zero( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 1000 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret double [[MUL]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %f_prod.01 = phi double [ 1.000000e+00, %entry ], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds [1000 x double], ptr %arr_d, i64 0, i64 %i.02 + %0 = load double, ptr %arrayidx, align 8 + %mul = fmul fast double %f_prod.01, %0 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %end + +end: ; preds = %for.body + %f_prod.0.lcssa = phi double [ %mul, %for.body ] + ret double %f_prod.0.lcssa +} + +define double @test_multiple_phi_operands(ptr %arr_d, i1 %entry_cond) { +; CHECK-LABEL: @test_multiple_phi_operands( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[ENTRY_COND:%.*]], label [[FOR_BODY:%.*]], label [[ENTRY_2:%.*]] +; CHECK: entry_2: +; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY_2]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[ENTRY_2]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_02]], 999 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret double [[MUL]] +; +entry: + br i1 %entry_cond, label %for.body, label %entry_2 + +entry_2: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i64 [ 0, %entry ], [0, %entry_2], [ %inc, %for.body ] + %f_prod.01 = phi double [ 0.0, %entry ], [0.0, %entry_2], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds [1000 x double], ptr %arr_d, i64 0, i64 %i.02 + %0 = load double, ptr %arrayidx, align 8 + %mul = fmul fast double %f_prod.01, %0 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %end + +end: ; preds = %for.body + %f_prod.0.lcssa = phi double [ %mul, %for.body ] + ret double %f_prod.0.lcssa +} + +define double @test_multiple_phi_operands_with_non_zero(ptr %arr_d, i1 %entry_cond) { +; CHECK-LABEL: @test_multiple_phi_operands_with_non_zero( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[ENTRY_COND:%.*]], label [[FOR_BODY:%.*]], label [[ENTRY_2:%.*]] +; CHECK: entry_2: +; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY_2]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[F_PROD_01:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[ENTRY_2]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_02]], 999 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret double [[MUL]] +; +entry: + br i1 %entry_cond, label %for.body, label %entry_2 + +entry_2: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i64 [ 0, %entry ], [0, %entry_2], [ %inc, %for.body ] + %f_prod.01 = phi double [ 1.0, %entry ], [0.0, %entry_2], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds [1000 x double], ptr %arr_d, i64 0, i64 %i.02 + %0 = load double, ptr %arrayidx, align 8 + %mul = fmul fast double %f_prod.01, %0 + %inc = add i64 %i.02, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %end + +end: ; preds = %for.body + %f_prod.0.lcssa = phi double [ %mul, %for.body ] + ret double %f_prod.0.lcssa +} diff --git a/llvm/test/Transforms/InstCombine/snprintf-2.ll b/llvm/test/Transforms/InstCombine/snprintf-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/snprintf-2.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; Verify that snprintf calls with a constant size not exceeding INT_MAX +; and constant format string with no formatting directives are transformed +; into memcpy. Also verify that a size in excess of INT_MAX prevents +; the transformation. +; +; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s -check-prefixes=ANY,BE +; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s -check-prefixes=ANY,LE + +@s = constant [4 x i8] c"123\00" + +@adst = external global [0 x i8*] +@asiz = external global [0 x i32] + +declare i32 @snprintf(i8*, i64, i8*, ...) + + +; Verify that all snprintf calls with a bound between INT_MAX and down +; to 0 are transformed to memcpy. + +define void @fold_snprintf_fmt() { +; BE-LABEL: @fold_snprintf_fmt( +; BE-NEXT: [[PDIMAX1:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2147483647) to i32**), align 8 +; BE-NEXT: store i32 825373440, i32* [[PDIMAX1]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; BE-NEXT: [[PD52:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 5) to i32**), align 8 +; BE-NEXT: store i32 825373440, i32* [[PD52]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 5), align 4 +; BE-NEXT: [[PD43:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 4) to i32**), align 8 +; BE-NEXT: store i32 825373440, i32* [[PD43]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 4), align 4 +; BE-NEXT: [[PD3:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 3), align 8 +; BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[PD3]] to i16* +; BE-NEXT: store i16 12594, i16* [[TMP1]], align 1 +; BE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[PD3]], i64 2 +; BE-NEXT: store i8 0, i8* [[ENDPTR]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 3), align 4 +; BE-NEXT: [[PD2:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2), align 8 +; BE-NEXT: store i8 49, i8* [[PD2]], align 1 +; BE-NEXT: [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[PD2]], i64 1 +; BE-NEXT: store i8 0, i8* [[ENDPTR4]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 2), align 4 +; BE-NEXT: [[PD1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 1), align 8 +; BE-NEXT: store i8 0, i8* [[PD1]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 1), align 4 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; BE-NEXT: ret void +; +; LE-LABEL: @fold_snprintf_fmt( +; LE-NEXT: [[PDIMAX1:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2147483647) to i32**), align 8 +; LE-NEXT: store i32 3355185, i32* [[PDIMAX1]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; LE-NEXT: [[PD52:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 5) to i32**), align 8 +; LE-NEXT: store i32 3355185, i32* [[PD52]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 5), align 4 +; LE-NEXT: [[PD43:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 4) to i32**), align 8 +; LE-NEXT: store i32 3355185, i32* [[PD43]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 4), align 4 +; LE-NEXT: [[PD3:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 3), align 8 +; LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[PD3]] to i16* +; LE-NEXT: store i16 12849, i16* [[TMP1]], align 1 +; LE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[PD3]], i64 2 +; LE-NEXT: store i8 0, i8* [[ENDPTR]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 3), align 4 +; LE-NEXT: [[PD2:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2), align 8 +; LE-NEXT: store i8 49, i8* [[PD2]], align 1 +; LE-NEXT: [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[PD2]], i64 1 +; LE-NEXT: store i8 0, i8* [[ENDPTR4]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 2), align 4 +; LE-NEXT: [[PD1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 1), align 8 +; LE-NEXT: store i8 0, i8* [[PD1]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 1), align 4 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; LE-NEXT: ret void +; + %fmt = getelementptr [4 x i8], [4 x i8]* @s, i32 0, i32 0 + + %pdimax = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 2147483647) + %nimax = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdimax, i64 2147483647, i8* %fmt) + store i32 %nimax, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 0) + + %pd5 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 5) + %n5 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd5, i64 5, i8* %fmt) + store i32 %n5, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 5) + + %pd4 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 4) + %n4 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd4, i64 4, i8* %fmt) + store i32 %n4, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 4) + + %pd3 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 3) + %n3 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd3, i64 3, i8* %fmt) + store i32 %n3, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 3) + + %pd2 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 2) + %n2 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd2, i64 2, i8* %fmt) + store i32 %n2, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 2) + + %pd1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 1) + %n1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd1, i64 1, i8* %fmt) + store i32 %n1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 1) + + %pd0 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 0) + %n0 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd0, i64 0, i8* %fmt) + store i32 %n0, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 0) + + ret void +} + + +; Verify that snprintf calls with a bound greater than INT_MAX are not +; transformed. POSIX requires implementations to set errno to EOVERFLOW +; so such calls could be folded to just that followed by returning -1. + +define void @call_snprintf_fmt_ximax() { +; ANY-LABEL: @call_snprintf_fmt_ximax( +; ANY-NEXT: [[PDM1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 1), align 8 +; ANY-NEXT: [[NM1:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[PDM1]], i64 -1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @s, i64 0, i64 0)) +; ANY-NEXT: store i32 [[NM1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 1), align 4 +; ANY-NEXT: [[PDIMAXP1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 0), align 8 +; ANY-NEXT: [[NIMAXP1:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[PDIMAXP1]], i64 2147483648, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @s, i64 0, i64 0)) +; ANY-NEXT: store i32 [[NIMAXP1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; ANY-NEXT: ret void +; + %fmt = getelementptr [4 x i8], [4 x i8]* @s, i32 0, i32 0 + + %pdm1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 1) + %nm1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdm1, i64 -1, i8* %fmt) + store i32 %nm1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 1) + + %pdimaxp1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 0) + %nimaxp1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdimaxp1, i64 2147483648, i8* %fmt) + store i32 %nimaxp1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 0) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/snprintf-3.ll b/llvm/test/Transforms/InstCombine/snprintf-3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/snprintf-3.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; Verify that snprintf calls with a constant size not exceeding INT_MAX +; and a "%s" format string and a const string argument are transformed +; into memcpy. Also verify that a size in excess of INT_MAX prevents +; the transformation. +; +; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s -check-prefixes=ANY,BE +; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s -check-prefixes=ANY,LE + +@pcnt_s = constant [3 x i8] c"%s\00" +@s = constant [4 x i8] c"123\00" + +@adst = external global [0 x i8*] +@asiz = external global [0 x i32] + +declare i32 @snprintf(i8*, i64, i8*, ...) + + +; Verify that all snprintf calls with a bound between INT_MAX and down +; to 0 are transformed to memcpy. + +define void @fold_snprintf_pcnt_s() { +; BE-LABEL: @fold_snprintf_pcnt_s( +; BE-NEXT: [[PDIMAX1:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2147483647) to i32**), align 8 +; BE-NEXT: store i32 825373440, i32* [[PDIMAX1]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; BE-NEXT: [[PD52:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 5) to i32**), align 8 +; BE-NEXT: store i32 825373440, i32* [[PD52]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 5), align 4 +; BE-NEXT: [[PD43:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 4) to i32**), align 8 +; BE-NEXT: store i32 825373440, i32* [[PD43]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 4), align 4 +; BE-NEXT: [[PD3:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 3), align 8 +; BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[PD3]] to i16* +; BE-NEXT: store i16 12594, i16* [[TMP1]], align 1 +; BE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[PD3]], i64 2 +; BE-NEXT: store i8 0, i8* [[ENDPTR]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 3), align 4 +; BE-NEXT: [[PD2:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2), align 8 +; BE-NEXT: store i8 49, i8* [[PD2]], align 1 +; BE-NEXT: [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[PD2]], i64 1 +; BE-NEXT: store i8 0, i8* [[ENDPTR4]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 2), align 4 +; BE-NEXT: [[PD1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 1), align 8 +; BE-NEXT: store i8 0, i8* [[PD1]], align 1 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 1), align 4 +; BE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; BE-NEXT: ret void +; +; LE-LABEL: @fold_snprintf_pcnt_s( +; LE-NEXT: [[PDIMAX1:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2147483647) to i32**), align 8 +; LE-NEXT: store i32 3355185, i32* [[PDIMAX1]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; LE-NEXT: [[PD52:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 5) to i32**), align 8 +; LE-NEXT: store i32 3355185, i32* [[PD52]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 5), align 4 +; LE-NEXT: [[PD43:%.*]] = load i32*, i32** bitcast (i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 4) to i32**), align 8 +; LE-NEXT: store i32 3355185, i32* [[PD43]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 4), align 4 +; LE-NEXT: [[PD3:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 3), align 8 +; LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[PD3]] to i16* +; LE-NEXT: store i16 12849, i16* [[TMP1]], align 1 +; LE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[PD3]], i64 2 +; LE-NEXT: store i8 0, i8* [[ENDPTR]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 3), align 4 +; LE-NEXT: [[PD2:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2), align 8 +; LE-NEXT: store i8 49, i8* [[PD2]], align 1 +; LE-NEXT: [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[PD2]], i64 1 +; LE-NEXT: store i8 0, i8* [[ENDPTR4]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 2), align 4 +; LE-NEXT: [[PD1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 1), align 8 +; LE-NEXT: store i8 0, i8* [[PD1]], align 1 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 1), align 4 +; LE-NEXT: store i32 3, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; LE-NEXT: ret void +; + %fmt = getelementptr [3 x i8], [3 x i8]* @pcnt_s, i32 0, i32 0 + %ps = getelementptr [4 x i8], [4 x i8]* @s, i32 0, i32 0 + + %pdimax = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 2147483647) + %nimax = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdimax, i64 2147483647, i8* %fmt, i8* %ps) + store i32 %nimax, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 0) + + %pd5 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 5) + %n5 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd5, i64 5, i8* %fmt, i8* %ps) + store i32 %n5, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 5) + + %pd4 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 4) + %n4 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd4, i64 4, i8* %fmt, i8* %ps) + store i32 %n4, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 4) + + %pd3 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 3) + %n3 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd3, i64 3, i8* %fmt, i8* %ps) + store i32 %n3, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 3) + + %pd2 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 2) + %n2 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd2, i64 2, i8* %fmt, i8* %ps) + store i32 %n2, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 2) + + %pd1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 1) + %n1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd1, i64 1, i8* %fmt, i8* %ps) + store i32 %n1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 1) + + %pd0 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 0) + %n0 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd0, i64 0, i8* %fmt, i8* %ps) + store i32 %n0, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 0) + + ret void +} + + +; Verify that snprintf calls with a bound greater than INT_MAX are not +; transformed. POSIX requires implementations to set errno to EOVERFLOW +; so such calls could be folded to just that followed by returning -1. + +define void @call_snprintf_pcnt_s_ximax() { +; ANY-LABEL: @call_snprintf_pcnt_s_ximax( +; ANY-NEXT: [[PDM1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 1), align 8 +; ANY-NEXT: [[NM1:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[PDM1]], i64 -1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @pcnt_s, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @s, i64 0, i64 0)) +; ANY-NEXT: store i32 [[NM1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 1), align 4 +; ANY-NEXT: [[PDIMAXP1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 0), align 8 +; ANY-NEXT: [[NIMAXP1:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[PDIMAXP1]], i64 2147483648, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @pcnt_s, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @s, i64 0, i64 0)) +; ANY-NEXT: store i32 [[NIMAXP1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; ANY-NEXT: ret void +; + %fmt = getelementptr [3 x i8], [3 x i8]* @pcnt_s, i32 0, i32 0 + %ps = getelementptr [4 x i8], [4 x i8]* @s, i32 0, i32 0 + + %pdm1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 1) + %nm1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdm1, i64 -1, i8* %fmt, i8* %ps) + store i32 %nm1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 1) + + %pdimaxp1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 0) + %nimaxp1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdimaxp1, i64 2147483648, i8* %fmt, i8* %ps) + store i32 %nimaxp1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 0) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/snprintf-4.ll b/llvm/test/Transforms/InstCombine/snprintf-4.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/snprintf-4.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; Verify that snprintf calls with a constant size not exceeding INT_MAX +; and a "%c" format string are transformed into a store of the character. +; Also verify that a size in excess of INT_MAX prevents the transformation. +; +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +@pcnt_c = constant [3 x i8] c"%c\00" + +@adst = external global [0 x i8*] +@asiz = external global [0 x i32] + +declare i32 @snprintf(i8*, i64, i8*, ...) + + +; Verify that all snprintf calls with a bound between INT_MAX and down +; to 0 are transformed to memcpy. + +define void @fold_snprintf_pcnt_c(i32 %c) { +; CHECK-LABEL: @fold_snprintf_pcnt_c( +; CHECK-NEXT: [[PDIMAX:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 0), align 8 +; CHECK-NEXT: store i8 1, i8* [[PDIMAX]], align 1 +; CHECK-NEXT: [[NUL:%.*]] = getelementptr inbounds i8, i8* [[PDIMAX]], i64 1 +; CHECK-NEXT: store i8 0, i8* [[NUL]], align 1 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; CHECK-NEXT: [[PD2:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 1), align 8 +; CHECK-NEXT: store i8 2, i8* [[PD2]], align 1 +; CHECK-NEXT: [[NUL1:%.*]] = getelementptr inbounds i8, i8* [[PD2]], i64 1 +; CHECK-NEXT: store i8 0, i8* [[NUL1]], align 1 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 1), align 4 +; CHECK-NEXT: [[PD2_0:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2), align 8 +; CHECK-NEXT: store i8 0, i8* [[PD2_0]], align 1 +; CHECK-NEXT: [[NUL2:%.*]] = getelementptr inbounds i8, i8* [[PD2_0]], i64 1 +; CHECK-NEXT: store i8 0, i8* [[NUL2]], align 1 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 2), align 4 +; CHECK-NEXT: [[PD1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 3), align 8 +; CHECK-NEXT: store i8 0, i8* [[PD1]], align 1 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 3), align 4 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 4), align 4 +; CHECK-NEXT: [[PD2_C:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 4), align 8 +; CHECK-NEXT: [[CHAR:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: store i8 [[CHAR]], i8* [[PD2_C]], align 1 +; CHECK-NEXT: [[NUL3:%.*]] = getelementptr inbounds i8, i8* [[PD2_C]], i64 1 +; CHECK-NEXT: store i8 0, i8* [[NUL3]], align 1 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 4), align 4 +; CHECK-NEXT: [[PD1_C:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 5), align 8 +; CHECK-NEXT: store i8 0, i8* [[PD1_C]], align 1 +; CHECK-NEXT: store i32 1, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 5), align 4 +; CHECK-NEXT: ret void +; + %fmt = getelementptr [3 x i8], [3 x i8]* @pcnt_c, i32 0, i32 0 + + ; Transform snprintf(dst, INT_MAX, "%c", 1) to memcpy(dst, "1", 2), 1. + %pdimax = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 0) + %nimax = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdimax, i64 2147483647, i8* %fmt, i32 1) + store i32 %nimax, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 0) + + ; Transform snprintf(dst, 2, "%c", '\2') to memcpy(dst, "2", 2), 1. + %pd2 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 1) + %n2 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd2, i64 2, i8* %fmt, i8 2) + store i32 %n2, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 1) + + ; Transform snprintf(dst, 2, "%c", '\0') to memcpy(dst, "\0", 2), 1. + %pd2_0 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 2) + %n2_0 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd2_0, i64 2, i8* %fmt, i8 0) + store i32 %n2_0, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 2) + + ; Transform snprintf(dst, 1, "%c", (short)3) to memcpy(dst, "\3", 2), 1. + %pd1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 3) + %n1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd1, i64 1, i8* %fmt, i16 3) + store i32 %n1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 3) + + ; Fold snprintf(dst, 0, "%c", 4) to 1. + %pd0 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 4) + %n0 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd0, i64 0, i8* %fmt, i32 4) + store i32 %n0, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 4) + + + ; Transform snprintf(dst, 2, "%c", c) with a nonconstant c to + ; dst[0] = c, dst[1] = '\0', 1. + %pd2_c = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 4) + %n2_c = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd2_c, i64 2, i8* %fmt, i32 %c) + store i32 %n2_c, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 4) + + ; Transform snprintf(dst, 1, "%c", c) with a nonconstant c to *dst = '\0', 0. + %pd1_c = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 5) + %n1_c = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pd1_c, i64 1, i8* %fmt, i32 %c) + store i32 %n1_c, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 5) + + ret void +} + + +; Verify that snprintf calls with a bound greater than INT_MAX are not +; transformed. POSIX requires implementations to set errno to EOVERFLOW +; so such calls could be folded to just that followed by returning -1. + +define void @call_snprintf_pcnt_c_ximax(i32 %c) { +; CHECK-LABEL: @call_snprintf_pcnt_c_ximax( +; CHECK-NEXT: [[PDM1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 0), align 8 +; CHECK-NEXT: [[NM1:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[PDM1]], i64 -1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @pcnt_c, i64 0, i64 0), i8 0) +; CHECK-NEXT: store i32 [[NM1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 0), align 4 +; CHECK-NEXT: [[PDIMAXP1:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 1), align 8 +; CHECK-NEXT: [[NIMAXP1:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[PDIMAXP1]], i64 2147483648, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @pcnt_c, i64 0, i64 0), i8 1) +; CHECK-NEXT: store i32 [[NIMAXP1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 1), align 4 +; CHECK-NEXT: [[PDM1SL32:%.*]] = load i8*, i8** getelementptr inbounds ([0 x i8*], [0 x i8*]* @adst, i64 0, i64 2), align 8 +; CHECK-NEXT: [[NM1SL32:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[PDM1SL32]], i64 -4294967296, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @pcnt_c, i64 0, i64 0), i8 1) +; CHECK-NEXT: store i32 [[NM1SL32]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @asiz, i64 0, i64 2), align 4 +; CHECK-NEXT: ret void +; + %fmt = getelementptr [3 x i8], [3 x i8]* @pcnt_c, i32 0, i32 0 + + %pdm1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 0) + %nm1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdm1, i64 -1, i8* %fmt, i8 0) + store i32 %nm1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 0) + + + %pdimaxp1 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 1) + %nimaxp1 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdimaxp1, i64 2147483648, i8* %fmt, i8 1) + store i32 %nimaxp1, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 1) + + ; Exercise snprintf(dst, -1LU << 32, "%c", c). + %pdm1sl32 = load i8*, i8** getelementptr ([0 x i8*], [0 x i8*]* @adst, i32 0, i32 2) + %nm1sl32 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %pdm1sl32, i64 18446744069414584320, i8* %fmt, i8 1) + store i32 %nm1sl32, i32* getelementptr ([0 x i32], [0 x i32]* @asiz, i32 0, i32 2) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/snprintf.ll b/llvm/test/Transforms/InstCombine/snprintf.ll --- a/llvm/test/Transforms/InstCombine/snprintf.ll +++ b/llvm/test/Transforms/InstCombine/snprintf.ll @@ -92,10 +92,10 @@ ret i32 %call } -define i32 @test_char_wrong_size(i8* %buf) #0 { -; CHECK-LABEL: @test_char_wrong_size( -; CHECK-NEXT: [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[BUF:%.*]], i64 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.2, i64 0, i64 0), i32 65) -; CHECK-NEXT: ret i32 [[CALL]] +define i32 @test_char_small_size(i8* %buf) #0 { +; CHECK-LABEL: @test_char_small_size( +; CHECK-NEXT: store i8 0, i8* [[BUF:%.*]], align 1 +; CHECK-NEXT: ret i32 1 ; %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.2, i64 0, i64 0), i32 65) #2 ret i32 %call @@ -120,10 +120,10 @@ ret i32 %call } -define i32 @test_str_wrong_size(i8* %buf) #0 { -; CHECK-LABEL: @test_str_wrong_size( -; CHECK-NEXT: [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* noundef nonnull dereferenceable(1) [[BUF:%.*]], i64 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) -; CHECK-NEXT: ret i32 [[CALL]] +define i32 @test_str_small_size(i8* %buf) #0 { +; CHECK-LABEL: @test_str_small_size( +; CHECK-NEXT: store i8 0, i8* [[BUF:%.*]], align 1 +; CHECK-NEXT: ret i32 3 ; %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2 ret i32 %call diff --git a/llvm/test/Transforms/InstCombine/ssubo.ll b/llvm/test/Transforms/InstCombine/ssubo.ll --- a/llvm/test/Transforms/InstCombine/ssubo.ll +++ b/llvm/test/Transforms/InstCombine/ssubo.ll @@ -4,6 +4,8 @@ declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) +declare void @use(i1) + define i1 @test_generic(i64 %a, i64 %b) { ; CHECK-LABEL: @test_generic( ; CHECK-NEXT: [[RES:%.*]] = tail call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) @@ -95,3 +97,72 @@ ret i1 %overflow } +define i1 @sub_eq0(i8 %x, i8 %y, i1 %b) { +; CHECK-LABEL: @sub_eq0( +; CHECK-NEXT: [[SS:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[SS]], 1 +; CHECK-NEXT: call void @use(i1 [[OV]]) +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[EQ0]] +; + %ss = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) + %ov = extractvalue { i8, i1 } %ss, 1 + call void @use(i1 %ov) + %sub = extractvalue { i8, i1 } %ss, 0 + %eq0 = icmp eq i8 %sub, 0 + ret i1 %eq0 +} + +define i1 @sub_ne0(i8 %x, i8 %y, i1 %b) { +; CHECK-LABEL: @sub_ne0( +; CHECK-NEXT: [[SS:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[SS]], 1 +; CHECK-NEXT: call void @use(i1 [[OV]]) +; CHECK-NEXT: [[NE0:%.*]] = icmp ne i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[NE0]] +; + %ss = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) + %ov = extractvalue { i8, i1 } %ss, 1 + call void @use(i1 %ov) + %sub = extractvalue { i8, i1 } %ss, 0 + %ne0 = icmp ne i8 %sub, 0 + ret i1 %ne0 +} + +; negative test - need zero + +define i1 @sub_eq1(i8 %x, i8 %y, i1 %b) { +; CHECK-LABEL: @sub_eq1( +; CHECK-NEXT: [[SS:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[SS]], 1 +; CHECK-NEXT: call void @use(i1 [[OV]]) +; CHECK-NEXT: [[SUB:%.*]] = extractvalue { i8, i1 } [[SS]], 0 +; CHECK-NEXT: [[EQ1:%.*]] = icmp eq i8 [[SUB]], 1 +; CHECK-NEXT: ret i1 [[EQ1]] +; + %ss = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) + %ov = extractvalue { i8, i1 } %ss, 1 + call void @use(i1 %ov) + %sub = extractvalue { i8, i1 } %ss, 0 + %eq1 = icmp eq i8 %sub, 1 + ret i1 %eq1 +} + +; negative test - need equality pred + +define i1 @sub_sgt0(i8 %x, i8 %y, i1 %b) { +; CHECK-LABEL: @sub_sgt0( +; CHECK-NEXT: [[SS:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[SS]], 1 +; CHECK-NEXT: call void @use(i1 [[OV]]) +; CHECK-NEXT: [[SUB:%.*]] = extractvalue { i8, i1 } [[SS]], 0 +; CHECK-NEXT: [[SGT0:%.*]] = icmp sgt i8 [[SUB]], 0 +; CHECK-NEXT: ret i1 [[SGT0]] +; + %ss = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) + %ov = extractvalue { i8, i1 } %ss, 1 + call void @use(i1 %ov) + %sub = extractvalue { i8, i1 } %ss, 0 + %sgt0 = icmp sgt i8 %sub, 0 + ret i1 %sgt0 +} diff --git a/llvm/test/Transforms/InstCombine/strlcpy-1.ll b/llvm/test/Transforms/InstCombine/strlcpy-1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/strlcpy-1.ll @@ -0,0 +1,357 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Assertions have been autogenerated by utils/update_test_checks.py +; +; RUN: opt < %s -data-layout="E" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,BE +; RUN: opt < %s -data-layout="e" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,LE +; +; Test that the strncpy library call simplifier works correctly. + +declare i64 @strlcpy(i8*, i8*, i64) + +declare void @sink(i8*, i64) + + +@s4 = constant [5 x i8] c"1234\00" + + +; Verify that strlcpy(D, "", N) calls are transformed to a nul store +; to *D for nonzero N and folded to zero for all values of N. + +define void @fold_strlcpy_s0(i8* %dst) { +; ANY-LABEL: @fold_strlcpy_s0( +; ANY-NEXT: call void @sink(i8* [[DST:%.*]], i64 0) +; ANY-NEXT: store i8 0, i8* [[DST]], align 1 +; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i64 0) +; ANY-NEXT: store i8 0, i8* [[DST]], align 1 +; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i64 0) +; ANY-NEXT: ret void +; + %ps0 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 4 + +; Fold strlcpy(D, "", 0) to just 0. + %ns0_0 = call i64 @strlcpy(i8* %dst, i8* %ps0, i64 0) + call void @sink(i8* %dst, i64 %ns0_0) + +; Transform strlcpy(D, "", 1) to *D = '\0, 0. + %ns0_1 = call i64 @strlcpy(i8* %dst, i8* %ps0, i64 1) + call void @sink(i8* %dst, i64 %ns0_1) + +; Transform strlcpy(D, "", SIZE_MAX) to *D = '\0, 0. + %ns0_m1 = call i64 @strlcpy(i8* %dst, i8* %ps0, i64 -1) + call void @sink(i8* %dst, i64 %ns0_m1) + + ret void +} + + +; Verify that strlcpy(D, "4", N) calls are transformed to a store to +; D[0] for nonzero N (and a nul store to D[1] for N greater than 1) +; and folded to 1 for all values of N. + +define void @fold_strlcpy_s1(i8* %dst) { +; BE-LABEL: @fold_strlcpy_s1( +; BE-NEXT: call void @sink(i8* [[DST:%.*]], i64 1) +; BE-NEXT: store i8 0, i8* [[DST]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 1) +; BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16* +; BE-NEXT: store i16 13312, i16* [[TMP1]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 1) +; BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i16* +; BE-NEXT: store i16 13312, i16* [[TMP2]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 1) +; BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[DST]] to i16* +; BE-NEXT: store i16 13312, i16* [[TMP3]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 1) +; BE-NEXT: ret void +; +; LE-LABEL: @fold_strlcpy_s1( +; LE-NEXT: call void @sink(i8* [[DST:%.*]], i64 1) +; LE-NEXT: store i8 0, i8* [[DST]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 1) +; LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16* +; LE-NEXT: store i16 52, i16* [[TMP1]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 1) +; LE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i16* +; LE-NEXT: store i16 52, i16* [[TMP2]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 1) +; LE-NEXT: [[TMP3:%.*]] = bitcast i8* [[DST]] to i16* +; LE-NEXT: store i16 52, i16* [[TMP3]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 1) +; LE-NEXT: ret void +; + %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3 + +; Fold strlcpy(D, "4", 0) to 1. + %ns1_0 = call i64 @strlcpy(i8* %dst, i8* %ps1, i64 0) + call void @sink(i8* %dst, i64 %ns1_0) + +; Transform strlcpy(D, "4", 1) to *D = '\0', 1. + %ns1_1 = call i64 @strlcpy(i8* %dst, i8* %ps1, i64 1) + call void @sink(i8* %dst, i64 %ns1_1) + +; Transform strlcpy(D, "4", 2) to D[0] = '\4, D[1] = '\0', 1. + %ns1_2 = call i64 @strlcpy(i8* %dst, i8* %ps1, i64 2) + call void @sink(i8* %dst, i64 %ns1_2) + +; Transform strlcpy(D, "4", 3) to D[0] = '\4, D[1] = '\0', 1.. + %ns1_3 = call i64 @strlcpy(i8* %dst, i8* %ps1, i64 3) + call void @sink(i8* %dst, i64 %ns1_3) + +; Transform strlcpy(D, "4", SIZE_MAX) to D[0] = '\4, D[1] = '\0', 1. + %ns1_m1 = call i64 @strlcpy(i8* %dst, i8* %ps1, i64 -1) + call void @sink(i8* %dst, i64 %ns1_m1) + + ret void +} + + +; Verify that strlcpy(D, "1234", N) calls are transformed to a copy of +; the N - 1 leading characters of the string to D and folded to 4 for +; all values of N. + +define void @fold_strlcpy_s5(i8* %dst) { +; BE-LABEL: @fold_strlcpy_s5( +; BE-NEXT: call void @sink(i8* [[DST:%.*]], i64 4) +; BE-NEXT: store i8 0, i8* [[DST]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; BE-NEXT: store i8 49, i8* [[DST]], align 1 +; BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; BE-NEXT: store i8 0, i8* [[TMP1]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i16* +; BE-NEXT: store i16 12594, i16* [[TMP2]], align 1 +; BE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2 +; BE-NEXT: store i8 0, i8* [[TMP3]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false) +; BE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3 +; BE-NEXT: store i8 0, i8* [[TMP4]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 5, i1 false) +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 5, i1 false) +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 5, i1 false) +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; BE-NEXT: ret void +; +; LE-LABEL: @fold_strlcpy_s5( +; LE-NEXT: call void @sink(i8* [[DST:%.*]], i64 4) +; LE-NEXT: store i8 0, i8* [[DST]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; LE-NEXT: store i8 49, i8* [[DST]], align 1 +; LE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; LE-NEXT: store i8 0, i8* [[TMP1]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; LE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i16* +; LE-NEXT: store i16 12849, i16* [[TMP2]], align 1 +; LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2 +; LE-NEXT: store i8 0, i8* [[TMP3]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false) +; LE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3 +; LE-NEXT: store i8 0, i8* [[TMP4]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 5, i1 false) +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 5, i1 false) +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 5, i1 false) +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 4) +; LE-NEXT: ret void +; + %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0 + +; Fold strlcpy(D, "1234", 0) to 4. + %ns4_0 = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 0) + call void @sink(i8* %dst, i64 %ns4_0) + +; Transform strlcpy(D, "1234", 1) to *D = '\0', 4. + %ns4_1 = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 1) + call void @sink(i8* %dst, i64 %ns4_1) + +; Transform strlcpy(D, "1234", 2) to D[0] = '1', D[1] = '\0', 4. + %ns4_2 = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 2) + call void @sink(i8* %dst, i64 %ns4_2) + +; Transform strlcpy(D, S="1234", 3) to memcpy(D, S, 2), D[2] = '\0', 4. + %ns4_3 = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 3) + call void @sink(i8* %dst, i64 %ns4_3) + +; Transform strlcpy(D, S="1234", 4) to memcpy(D, S, 3), D[3] = '\0', 4. + %ns4_4 = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 4) + call void @sink(i8* %dst, i64 %ns4_4) + +; Transform strlcpy(D, S="1234", 5) to memcpy(D, S, 5), 4. + %ns4_5 = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 5) + call void @sink(i8* %dst, i64 %ns4_5) + +; Transform strlcpy(D, S="1234", 9) to memcpy(D, S, 5), 4. + %ns4_9 = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 5) + call void @sink(i8* %dst, i64 %ns4_9) + +; Transform strlcpy(D, S="1234", SIZE_MAX) to memcpy(D, S, 5), 4. + %ns4_m1 = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 -1) + call void @sink(i8* %dst, i64 %ns4_m1) + + ret void +} + +; Verify that strlcpy(D, S, 1) calls are transformed into a nul store +; to *D, strlcpy(D, S, 0) to a no-op, and the result of both folded +; to strlen(S). + +define void @fold_strlcpy_s_0(i8* %dst, i8* %s, i64 %n) { +; ANY-LABEL: @fold_strlcpy_s_0( +; ANY-NEXT: store i8 0, i8* [[DST:%.*]], align 1 +; ANY-NEXT: [[STRLEN:%.*]] = call i64 @strlen(i8* noundef nonnull dereferenceable(1) [[S:%.*]]) +; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i64 [[STRLEN]]) +; ANY-NEXT: [[STRLEN1:%.*]] = call i64 @strlen(i8* noundef nonnull dereferenceable(1) [[S]]) +; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i64 [[STRLEN1]]) +; ANY-NEXT: [[STRLEN2:%.*]] = call i64 @strlen(i8* noundef nonnull dereferenceable(1) [[S]]) +; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i64 [[STRLEN2]]) +; ANY-NEXT: ret void +; +; Transform strlcpy(D, S, 1) to *D = '\0', strlen(S). + %ns_1 = call i64 @strlcpy(i8* %dst, i8* %s, i64 1) + call void @sink(i8* %dst, i64 %ns_1) + +; For strlcpy(D, S, 0) to strlen(S). + %ns_0 = call i64 @strlcpy(i8* %dst, i8* %s, i64 0) + call void @sink(i8* %dst, i64 %ns_0) + + ; Verify that calling strlcpy with a null destination is also folded + ; (to match a possible extension of some implementations that emulate + ; snprintf(0, 0, "%s", S)). + %n0_s_0 = call i64 @strlcpy(i8* null, i8* %s, i64 0) + call void @sink(i8* %dst, i64 %n0_s_0) + + ret void +} + + +; Verify that strlcpy(D, S, N) calls are left alone when S and/or N are +; not known (except for the cases handled above). Also verify that they +; annotate the destination argument with the dereferenceable attribute +; only with nonzero N. + +define void @call_strlcpy_s0_n(i8* %dst, i8* %s, i64 %n) { +; ANY-LABEL: @call_strlcpy_s0_n( +; ANY-NEXT: [[NS_2:%.*]] = call i64 @strlcpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[S:%.*]], i64 2) +; ANY-NEXT: call void @sink(i8* [[DST]], i64 [[NS_2]]) +; ANY-NEXT: [[NS_N:%.*]] = call i64 @strlcpy(i8* [[DST]], i8* noundef nonnull dereferenceable(1) [[S]], i64 [[N:%.*]]) +; ANY-NEXT: call void @sink(i8* [[DST]], i64 [[NS_N]]) +; ANY-NEXT: [[NZ:%.*]] = or i64 [[N]], 1 +; ANY-NEXT: [[NS_NZ:%.*]] = call i64 @strlcpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[S]], i64 [[NZ]]) +; ANY-NEXT: call void @sink(i8* [[DST]], i64 [[NS_NZ]]) +; ANY-NEXT: [[NS0_N:%.*]] = call i64 @strlcpy(i8* [[DST]], i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 4), i64 [[N]]) +; ANY-NEXT: call void @sink(i8* [[DST]], i64 [[NS0_N]]) +; ANY-NEXT: [[NS1_N:%.*]] = call i64 @strlcpy(i8* [[DST]], i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 3), i64 [[N]]) +; ANY-NEXT: call void @sink(i8* [[DST]], i64 [[NS1_N]]) +; ANY-NEXT: [[NS4_N:%.*]] = call i64 @strlcpy(i8* [[DST]], i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 [[N]]) +; ANY-NEXT: call void @sink(i8* [[DST]], i64 [[NS4_N]]) +; ANY-NEXT: ret void +; + %ns_2 = call i64 @strlcpy(i8* %dst, i8* %s, i64 2) + call void @sink(i8* %dst, i64 %ns_2) + + %ns_n = call i64 @strlcpy(i8* %dst, i8* %s, i64 %n) + call void @sink(i8* %dst, i64 %ns_n) + + %nz = or i64 %n, 1 + %ns_nz = call i64 @strlcpy(i8* %dst, i8* %s, i64 %nz) + call void @sink(i8* %dst, i64 %ns_nz) + + + %ps0 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 4 + %ns0_n = call i64 @strlcpy(i8* %dst, i8* %ps0, i64 %n) + call void @sink(i8* %dst, i64 %ns0_n) + + %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3 + %ns1_n = call i64 @strlcpy(i8* %dst, i8* %ps1, i64 %n) + call void @sink(i8* %dst, i64 %ns1_n) + + %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0 + %ns4_n = call i64 @strlcpy(i8* %dst, i8* %ps4, i64 %n) + call void @sink(i8* %dst, i64 %ns4_n) + + ret void +} + + +@a5 = constant [5 x i8] c"12345" + +; Verify that the transformation behaves reasonably even when the source +; array is not a nul-terminated string as it's required to be (and doesn't +; for example attempt to read past its end). All the calls below are +; undefined so technically reading past the end would be fine but it's +; easy to avoid. + +define void @fold_strlcpy_a5(i8* %dst, i64 %n) { +; BE-LABEL: @fold_strlcpy_a5( +; BE-NEXT: call void @sink(i8* [[DST:%.*]], i64 5) +; BE-NEXT: store i8 0, i8* [[DST]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; BE-NEXT: store i8 49, i8* [[DST]], align 1 +; BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; BE-NEXT: store i8 0, i8* [[TMP1]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i32* +; BE-NEXT: store i32 825373492, i32* [[TMP2]], align 1 +; BE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; BE-NEXT: store i8 0, i8* [[TMP3]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i64 5, i1 false) +; BE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 5 +; BE-NEXT: store i8 0, i8* [[TMP4]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i64 5, i1 false) +; BE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 5 +; BE-NEXT: store i8 0, i8* [[TMP5]], align 1 +; BE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; BE-NEXT: ret void +; +; LE-LABEL: @fold_strlcpy_a5( +; LE-NEXT: call void @sink(i8* [[DST:%.*]], i64 5) +; LE-NEXT: store i8 0, i8* [[DST]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; LE-NEXT: store i8 49, i8* [[DST]], align 1 +; LE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; LE-NEXT: store i8 0, i8* [[TMP1]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; LE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i32* +; LE-NEXT: store i32 875770417, i32* [[TMP2]], align 1 +; LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; LE-NEXT: store i8 0, i8* [[TMP3]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i64 5, i1 false) +; LE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 5 +; LE-NEXT: store i8 0, i8* [[TMP4]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i64 5, i1 false) +; LE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 5 +; LE-NEXT: store i8 0, i8* [[TMP5]], align 1 +; LE-NEXT: call void @sink(i8* nonnull [[DST]], i64 5) +; LE-NEXT: ret void +; + %pa5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0 + %na5_0 = call i64 @strlcpy(i8* %dst, i8* %pa5, i64 0) + call void @sink(i8* %dst, i64 %na5_0) + + %na5_1 = call i64 @strlcpy(i8* %dst, i8* %pa5, i64 1) + call void @sink(i8* %dst, i64 %na5_1) + + %na5_2 = call i64 @strlcpy(i8* %dst, i8* %pa5, i64 2) + call void @sink(i8* %dst, i64 %na5_2) + + %na5_5 = call i64 @strlcpy(i8* %dst, i8* %pa5, i64 5) + call void @sink(i8* %dst, i64 %na5_5) + + %na5_6 = call i64 @strlcpy(i8* %dst, i8* %pa5, i64 6) + call void @sink(i8* %dst, i64 %na5_6) + + %na5_9 = call i64 @strlcpy(i8* %dst, i8* %pa5, i64 9) + call void @sink(i8* %dst, i64 %na5_9) + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/usubo.ll b/llvm/test/Transforms/InstCombine/usubo.ll --- a/llvm/test/Transforms/InstCombine/usubo.ll +++ b/llvm/test/Transforms/InstCombine/usubo.ll @@ -4,6 +4,8 @@ declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) +declare void @use(i1) + define i1 @test_generic(i64 %a, i64 %b) { ; CHECK-LABEL: @test_generic( ; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] @@ -94,3 +96,70 @@ ret i1 %overflow } +define i1 @sub_eq0(i8 %x, i8 %y, i1 %b) { +; CHECK-LABEL: @sub_eq0( +; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i1 [[OV]]) +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[EQ0]] +; + %us = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %x, i8 %y) + %ov = extractvalue { i8, i1 } %us, 1 + call void @use(i1 %ov) + %sub = extractvalue { i8, i1 } %us, 0 + %eq0 = icmp eq i8 %sub, 0 + ret i1 %eq0 +} + +define i1 @sub_ne0(i8 %x, i8 %y, i1 %b) { +; CHECK-LABEL: @sub_ne0( +; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i1 [[OV]]) +; CHECK-NEXT: [[NE0:%.*]] = icmp ne i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[NE0]] +; + %us = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %x, i8 %y) + %ov = extractvalue { i8, i1 } %us, 1 + call void @use(i1 %ov) + %sub = extractvalue { i8, i1 } %us, 0 + %ne0 = icmp ne i8 %sub, 0 + ret i1 %ne0 +} + +; negative test - need zero + +define i1 @sub_eq1(i8 %x, i8 %y, i1 %b) { +; CHECK-LABEL: @sub_eq1( +; CHECK-NEXT: [[SS:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[SS]], 1 +; CHECK-NEXT: call void @use(i1 [[OV]]) +; CHECK-NEXT: [[SUB:%.*]] = extractvalue { i8, i1 } [[SS]], 0 +; CHECK-NEXT: [[EQ1:%.*]] = icmp eq i8 [[SUB]], 1 +; CHECK-NEXT: ret i1 [[EQ1]] +; + %ss = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %x, i8 %y) + %ov = extractvalue { i8, i1 } %ss, 1 + call void @use(i1 %ov) + %sub = extractvalue { i8, i1 } %ss, 0 + %eq1 = icmp eq i8 %sub, 1 + ret i1 %eq1 +} + +; negative test - need equality pred + +define i1 @sub_sgt0(i8 %x, i8 %y, i1 %b) { +; CHECK-LABEL: @sub_sgt0( +; CHECK-NEXT: [[SS:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[SS]], 1 +; CHECK-NEXT: call void @use(i1 [[OV]]) +; CHECK-NEXT: [[SUB:%.*]] = extractvalue { i8, i1 } [[SS]], 0 +; CHECK-NEXT: [[SGT0:%.*]] = icmp sgt i8 [[SUB]], 0 +; CHECK-NEXT: ret i1 [[SGT0]] +; + %ss = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %x, i8 %y) + %ov = extractvalue { i8, i1 } %ss, 1 + call void @use(i1 %ov) + %sub = extractvalue { i8, i1 } %ss, 0 + %sgt0 = icmp sgt i8 %sub, 0 + ret i1 %sgt0 +} diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll --- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll +++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll @@ -1126,3 +1126,17 @@ %r = and i8 %b, 126 ret i8 %r } + +; hidden simplifydemandedbits constant. +define i8 @noop_and_t2(i8 %x) { +; CHECK-LABEL: @noop_and_t2( +; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], 2 +; CHECK-NEXT: [[B:%.*]] = or i8 [[A]], 127 +; CHECK-NEXT: [[C:%.*]] = and i8 [[B]], 62 +; CHECK-NEXT: ret i8 [[C]] +; + %a = and i8 %x, 2 + %b = or i8 %a, 127 + %c = and i8 %b, 62 + ret i8 %c +} diff --git a/llvm/test/Transforms/InstSimplify/implies.ll b/llvm/test/Transforms/InstSimplify/implies.ll --- a/llvm/test/Transforms/InstSimplify/implies.ll +++ b/llvm/test/Transforms/InstSimplify/implies.ll @@ -255,3 +255,15 @@ %res = icmp sge i1 %var30, %var29 ret i1 %res } + +; X <=(s) Y == Y ==> X (i1 1 becomes -1 for reasoning) +define i1 @test_sle(i32 %length.i, i32 %i) { +; CHECK-LABEL: @test_sle( +; CHECK-NEXT: ret i1 true +; + %iplus1 = add nsw nuw i32 %i, 1 + %var29 = icmp ult i32 %i, %length.i + %var30 = icmp ult i32 %iplus1, %length.i + %res = icmp sle i1 %var29, %var30 + ret i1 %res +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll @@ -256,4 +256,247 @@ ret void } +define i4 @test_i4_load(i4* %ddst) { +; CHECK-LABEL: define i4 @test_i4_load +; CHECK-LABEL: vector.body: +; CHECK: [[LOAD:%.*]] = load i4, i4* {{.*}}, align 1, !nontemporal !0 +; CHECk: ret i4 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i4 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i4, i4* %ddst, i64 %indvars.iv + %l = load i4, i4* %arrayidx, align 1, !nontemporal !8 + %add = add i4 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i4 %add +} + +define i8 @test_load_i8(i8* %ddst) { +; CHECK-LABEL: @test_load_i8( +; CHECK-NOT: vector.body: +; CHECk: ret i8 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i8 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i8, i8* %ddst, i64 %indvars.iv + %l = load i8, i8* %arrayidx, align 1, !nontemporal !8 + %add = add i8 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i8 %add +} + +define half @test_half_load(half* %ddst) { +; CHECK-LABEL: @test_half_load +; CHECK-NOT: vector.body: +; CHECk: ret half %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi half [ 0.0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds half, half* %ddst, i64 %indvars.iv + %l = load half, half* %arrayidx, align 2, !nontemporal !8 + %add = fadd half %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret half %add +} + +define i16 @test_i16_load(i16* %ddst) { +; CHECK-LABEL: @test_i16_load +; CHECK-NOT: vector.body: +; CHECk: ret i16 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i16 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i16, i16* %ddst, i64 %indvars.iv + %l = load i16, i16* %arrayidx, align 2, !nontemporal !8 + %add = add i16 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i16 %add +} + +define i32 @test_i32_load(i32* %ddst) { +; CHECK-LABEL: @test_i32_load +; CHECK-NOT: vector.body: +; CHECk: ret i32 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %ddst, i64 %indvars.iv + %l = load i32, i32* %arrayidx, align 4, !nontemporal !8 + %add = add i32 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %add +} + +define i33 @test_i33_load(i33* %ddst) { +; CHECK-LABEL: @test_i33_load +; CHECK-NOT: vector.body: +; CHECk: ret i33 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i33 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i33, i33* %ddst, i64 %indvars.iv + %l = load i33, i33* %arrayidx, align 4, !nontemporal !8 + %add = add i33 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i33 %add +} + +define i40 @test_i40_load(i40* %ddst) { +; CHECK-LABEL: @test_i40_load +; CHECK-NOT: vector.body: +; CHECk: ret i40 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i40 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i40, i40* %ddst, i64 %indvars.iv + %l = load i40, i40* %arrayidx, align 4, !nontemporal !8 + %add = add i40 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i40 %add +} + +define i64 @test_i64_load(i64* %ddst) { +; CHECK-LABEL: @test_i64_load +; CHECK-NOT: vector.body: +; CHECk: ret i64 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i64 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i64, i64* %ddst, i64 %indvars.iv + %l = load i64, i64* %arrayidx, align 4, !nontemporal !8 + %add = add i64 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i64 %add +} + +define double @test_double_load(double* %ddst) { +; CHECK-LABEL: @test_double_load +; CHECK-NOT: vector.body: +; CHECk: ret double %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi double [ 0.0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds double, double* %ddst, i64 %indvars.iv + %l = load double, double* %arrayidx, align 4, !nontemporal !8 + %add = fadd double %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret double %add +} + +define i128 @test_i128_load(i128* %ddst) { +; CHECK-LABEL: @test_i128_load +; CHECK-NOT: vector.body: +; CHECk: ret i128 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i128 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i128, i128* %ddst, i64 %indvars.iv + %l = load i128, i128* %arrayidx, align 4, !nontemporal !8 + %add = add i128 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i128 %add +} + +define i256 @test_256_load(i256* %ddst) { +; CHECK-LABEL: @test_256_load +; CHECK-NOT: vector.body: +; CHECk: ret i256 %{{.*}} +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %acc.08 = phi i256 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i256, i256* %ddst, i64 %indvars.iv + %l = load i256, i256* %arrayidx, align 4, !nontemporal !8 + %add = add i256 %l, %acc.08 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i256 %add +} + !8 = !{i32 1} diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll @@ -14,7 +14,9 @@ ; CHECK: vector.body: ; CHECK-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32( ; CHECK-NOT: call void @llvm.masked.store.v4i32.p0v4i32( -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body +; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP1:![0-9]+]] +; +; CHECK: br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop [[SCALAR_LOOP1:![0-9]+]] entry: br label %for.body @@ -49,7 +51,9 @@ ; CHECK: %[[ADD:.*]] = add nsw <4 x i32> %[[WML2]], %[[WML1]] ; CHECK: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %[[ADD]], {{.*}}<4 x i1> %active.lane.mask ; CHECK: %index.next = add i64 %index, 4 -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body +; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP2:![0-9]+]] +; +; CHECK: br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop [[SCALAR_LOOP2:![0-9]+]] entry: br label %for.body @@ -70,12 +74,12 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !6 } -; CHECK: !0 = distinct !{!0, !1} -; CHECK-NEXT: !1 = !{!"llvm.loop.isvectorized", i32 1} -; CHECK-NEXT: !2 = distinct !{!2, !3, !1} -; CHECK-NEXT: !3 = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-NEXT: !4 = distinct !{!4, !1} -; CHECK-NEXT: !5 = distinct !{!5, !3, !1} +; CHECK: [[VEC_LOOP1]] = distinct !{[[VEC_LOOP1]], [[MD_IS_VEC:![0-9]+]]} +; CHECK-NEXT: [[MD_IS_VEC:![0-9]+]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-NEXT: [[SCALAR_LOOP1]] = distinct !{[[SCALAR_LOOP1]], [[MD_RT_UNROLL_DIS:![0-9]+]], [[MD_IS_VEC]]} +; CHECK-NEXT: [[MD_RT_UNROLL_DIS]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-NEXT: [[VEC_LOOP2]] = distinct !{[[VEC_LOOP2]], [[MD_IS_VEC]]} +; CHECK-NEXT: [[SCALAR_LOOP2]] = distinct !{[[SCALAR_LOOP2]], [[MD_RT_UNROLL_DIS]], [[MD_IS_VEC]]} !6 = distinct !{!6, !7, !8} !7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +target triple = "riscv64" + +; Dependence distance between read and write is greater than the trip +; count of the loop. Thus, values written are never read for any +; valid vectorization of the loop. +define void @test(ptr %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 32 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 200 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 +; CHECK-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, 200 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 +; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 200 +; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] +; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %a1 = getelementptr i64, ptr %p, i64 %iv + %v = load i64, ptr %a1, align 32 + %offset = add i64 %iv, 200 + %a2 = getelementptr i64, ptr %p, i64 %offset + store i64 %v, ptr %a2, align 32 + %iv.next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv, 199 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; Dependence distance is less than trip count, thus we must prove that +; chosen VF guaranteed to be less than dependence distance. +define void @test_may_clobber(ptr %p) { +; CHECK-LABEL: @test_may_clobber( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 32 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 100 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 +; CHECK-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, 200 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 +; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 +; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] +; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %a1 = getelementptr i64, ptr %p, i64 %iv + %v = load i64, ptr %a1, align 32 + %offset = add i64 %iv, 100 + %a2 = getelementptr i64, ptr %p, i64 %offset + store i64 %v, ptr %a2, align 32 + %iv.next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv, 199 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; Trviailly no overlap due to maximum possible value of VLEN and LMUL +define void @trivial_due_max_vscale(ptr %p) { +; CHECK-LABEL: @trivial_due_max_vscale( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP0]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 32 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 8192 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 32 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 +; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 8192 +; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] +; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %a1 = getelementptr i64, ptr %p, i64 %iv + %v = load i64, ptr %a1, align 32 + %offset = add i64 %iv, 8192 + %a2 = getelementptr i64, ptr %p, i64 %offset + store i64 %v, ptr %a2, align 32 + %iv.next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv, 199 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; Dependence distance could be violated via LMUL>=2 or interleaving +define void @no_high_lmul_or_interleave(ptr %p) { +; CHECK-LABEL: @no_high_lmul_or_interleave( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP0]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 32 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1024 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 32 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 +; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 1024 +; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] +; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %a1 = getelementptr i64, ptr %p, i64 %iv + %v = load i64, ptr %a1, align 32 + %offset = add i64 %iv, 1024 + %a2 = getelementptr i64, ptr %p, i64 %offset + store i64 %v, ptr %a2, align 32 + %iv.next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv, 199 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -16,8 +16,8 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], +; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <8 x i32>* @@ -33,7 +33,7 @@ ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP8]], <8 x i32>* [[TMP11]], i32 4, <8 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -52,7 +52,7 @@ ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 430 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: br label %for.body @@ -87,8 +87,8 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[INDUCTION]], +; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <8 x i32>* @@ -104,7 +104,7 @@ ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[TMP8]], <8 x i32>* [[TMP11]], i32 4, <8 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -123,7 +123,7 @@ ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 430 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; entry: br label %for.body @@ -176,8 +176,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], -; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* @@ -191,7 +191,7 @@ ; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP12]], <8 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP13]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -211,7 +211,7 @@ ; CHECK-NEXT: [[SUM_1]] = add nuw nsw i32 [[ADD]], [[SUM_0]] ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[SUM_1_LCSSA]] @@ -237,12 +237,12 @@ ret i32 %sum.1 } -; CHECK: !0 = distinct !{!0, !1} -; CHECK-NEXT: !1 = !{!"llvm.loop.isvectorized", i32 1} -; CHECK-NEXT: !2 = distinct !{!2, !3, !1} -; CHECK-NEXT: !3 = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-NEXT: !4 = distinct !{!4, !1} -; CHECK-NEXT: !5 = distinct !{!5, !3, !1} +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[MD_IS_VEC:![0-9]+]]} +; CHECK-NEXT: [[MD_IS_VEC:![0-9]+]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-NEXT: [[LOOP3]] = distinct !{[[LOOP3]], [[MD_RT_UNROLL_DIS:![0-9]+]], [[MD_IS_VEC]]} +; CHECK-NEXT: [[MD_RT_UNROLL_DIS]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-NEXT: [[LOOP4]] = distinct !{[[LOOP4]], [[MD_IS_VEC]]} +; CHECK-NEXT: [[LOOP5]] = distinct !{[[LOOP5]], [[MD_RT_UNROLL_DIS]], [[MD_IS_VEC]]} attributes #0 = { nounwind optsize uwtable "target-cpu"="core-avx2" "target-features"="+avx,+avx2" } diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-once.ll b/llvm/test/Transforms/LoopVectorize/vectorize-once.ll --- a/llvm/test/Transforms/LoopVectorize/vectorize-once.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-once.ll @@ -10,7 +10,9 @@ ; This test checks that we add metadata to vectorized loops ; CHECK-LABEL: @_Z4foo1Pii( ; CHECK: <4 x i32> -; CHECK: llvm.loop +; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP1:![0-9]+]] +; +; CHECK: br i1 %{{.*}}, label %{{.*}}, label %for.body.i, !llvm.loop [[SCALAR_LOOP1:![0-9]+]] ; CHECK: ret ; This test comes from the loop: @@ -42,7 +44,7 @@ ; This test checks that we don't vectorize loops that are marked with the "width" == 1 metadata. ; CHECK-LABEL: @_Z4foo2Pii( ; CHECK-NOT: <4 x i32> -; CHECK: llvm.loop +; CHECK: br i1 %{{.*}}, label %{{.*}}, label %for.body.i, !llvm.loop [[SCALAR_LOOP2:![0-9]+]] ; CHECK: ret define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 { entry: @@ -67,10 +69,12 @@ attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "frame-pointer"="non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" } -; CHECK: !0 = distinct !{!0, !1} -; CHECK: !1 = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: !2 = distinct !{!2, !3, !1} -; CHECK: !3 = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[VEC_LOOP1]] = distinct !{[[VEC_LOOP1]], [[MD_IS_VEC:![0-9]+]]} +; CHECK-NEXT: [[MD_IS_VEC:![0-9]+]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-NEXT: [[SCALAR_LOOP1]] = distinct !{[[SCALAR_LOOP1]], [[MD_RT_UNROLL_DIS:![0-9]+]], [[MD_IS_VEC]]} +; CHECK-NEXT: [[MD_RT_UNROLL_DIS]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-NEXT: [[SCALAR_LOOP2]] = distinct !{[[SCALAR_LOOP2]], [[VEC_WIDTH_1:![0-9]+]]} +; CHECK-NEXT: [[VEC_WIDTH_1]] = !{!"llvm.loop.vectorize.width", i32 1} !0 = !{!0, !1} !1 = !{!"llvm.loop.vectorize.width", i32 1} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll @@ -0,0 +1,384 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='default' -unroll-runtime -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +@b = global [58 x double] zeroinitializer, align 16 +@c = global [58 x double] zeroinitializer, align 16 +@a = global [58 x double] zeroinitializer, align 16 + +; Test case for #42332, showing excessive unrolling of vector loop. +define void @test_known_trip_count() { +; CHECK-LABEL: @test_known_trip_count( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr @b, align 16 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x double>, ptr @c, align 16 +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD3]] +; CHECK-NEXT: store <2 x double> [[TMP0]], ptr @a, align 16 +; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 2), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 2), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[WIDE_LOAD_1]], [[WIDE_LOAD3_1]] +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 2), align 16 +; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 4), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 4), align 16 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[WIDE_LOAD_2]], [[WIDE_LOAD3_2]] +; CHECK-NEXT: store <2 x double> [[TMP2]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 4), align 16 +; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 6), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_3:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 6), align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[WIDE_LOAD_3]], [[WIDE_LOAD3_3]] +; CHECK-NEXT: store <2 x double> [[TMP3]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 6), align 16 +; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 8), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_4:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 8), align 16 +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD_4]], [[WIDE_LOAD3_4]] +; CHECK-NEXT: store <2 x double> [[TMP4]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 8), align 16 +; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 10), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 10), align 16 +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD_5]], [[WIDE_LOAD3_5]] +; CHECK-NEXT: store <2 x double> [[TMP5]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 10), align 16 +; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 12), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_6:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 12), align 16 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[WIDE_LOAD_6]], [[WIDE_LOAD3_6]] +; CHECK-NEXT: store <2 x double> [[TMP6]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 12), align 16 +; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 14), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_7:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 14), align 16 +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[WIDE_LOAD_7]], [[WIDE_LOAD3_7]] +; CHECK-NEXT: store <2 x double> [[TMP7]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 14), align 16 +; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 16), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_8:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 16), align 16 +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[WIDE_LOAD_8]], [[WIDE_LOAD3_8]] +; CHECK-NEXT: store <2 x double> [[TMP8]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 16), align 16 +; CHECK-NEXT: [[WIDE_LOAD_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 18), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_9:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 18), align 16 +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[WIDE_LOAD_9]], [[WIDE_LOAD3_9]] +; CHECK-NEXT: store <2 x double> [[TMP9]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 18), align 16 +; CHECK-NEXT: [[WIDE_LOAD_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 20), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_10:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 20), align 16 +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[WIDE_LOAD_10]], [[WIDE_LOAD3_10]] +; CHECK-NEXT: store <2 x double> [[TMP10]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 20), align 16 +; CHECK-NEXT: [[WIDE_LOAD_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 22), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_11:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 22), align 16 +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[WIDE_LOAD_11]], [[WIDE_LOAD3_11]] +; CHECK-NEXT: store <2 x double> [[TMP11]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 22), align 16 +; CHECK-NEXT: [[WIDE_LOAD_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 24), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_12:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 24), align 16 +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[WIDE_LOAD_12]], [[WIDE_LOAD3_12]] +; CHECK-NEXT: store <2 x double> [[TMP12]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 24), align 16 +; CHECK-NEXT: [[WIDE_LOAD_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 26), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_13:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 26), align 16 +; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[WIDE_LOAD_13]], [[WIDE_LOAD3_13]] +; CHECK-NEXT: store <2 x double> [[TMP13]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 26), align 16 +; CHECK-NEXT: [[WIDE_LOAD_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 28), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_14:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 28), align 16 +; CHECK-NEXT: [[TMP14:%.*]] = fadd <2 x double> [[WIDE_LOAD_14]], [[WIDE_LOAD3_14]] +; CHECK-NEXT: store <2 x double> [[TMP14]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 28), align 16 +; CHECK-NEXT: [[WIDE_LOAD_15:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 30), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_15:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 30), align 16 +; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[WIDE_LOAD_15]], [[WIDE_LOAD3_15]] +; CHECK-NEXT: store <2 x double> [[TMP15]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 30), align 16 +; CHECK-NEXT: [[WIDE_LOAD_16:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 32), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_16:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 32), align 16 +; CHECK-NEXT: [[TMP16:%.*]] = fadd <2 x double> [[WIDE_LOAD_16]], [[WIDE_LOAD3_16]] +; CHECK-NEXT: store <2 x double> [[TMP16]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 32), align 16 +; CHECK-NEXT: [[WIDE_LOAD_17:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 34), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_17:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 34), align 16 +; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[WIDE_LOAD_17]], [[WIDE_LOAD3_17]] +; CHECK-NEXT: store <2 x double> [[TMP17]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 34), align 16 +; CHECK-NEXT: [[WIDE_LOAD_18:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 36), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_18:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 36), align 16 +; CHECK-NEXT: [[TMP18:%.*]] = fadd <2 x double> [[WIDE_LOAD_18]], [[WIDE_LOAD3_18]] +; CHECK-NEXT: store <2 x double> [[TMP18]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 36), align 16 +; CHECK-NEXT: [[WIDE_LOAD_19:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 38), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_19:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 38), align 16 +; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[WIDE_LOAD_19]], [[WIDE_LOAD3_19]] +; CHECK-NEXT: store <2 x double> [[TMP19]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 38), align 16 +; CHECK-NEXT: [[WIDE_LOAD_20:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 40), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_20:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 40), align 16 +; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[WIDE_LOAD_20]], [[WIDE_LOAD3_20]] +; CHECK-NEXT: store <2 x double> [[TMP20]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 40), align 16 +; CHECK-NEXT: [[WIDE_LOAD_21:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 42), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_21:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 42), align 16 +; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[WIDE_LOAD_21]], [[WIDE_LOAD3_21]] +; CHECK-NEXT: store <2 x double> [[TMP21]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 42), align 16 +; CHECK-NEXT: [[WIDE_LOAD_22:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 44), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_22:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 44), align 16 +; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[WIDE_LOAD_22]], [[WIDE_LOAD3_22]] +; CHECK-NEXT: store <2 x double> [[TMP22]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 44), align 16 +; CHECK-NEXT: [[WIDE_LOAD_23:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 46), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_23:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 46), align 16 +; CHECK-NEXT: [[TMP23:%.*]] = fadd <2 x double> [[WIDE_LOAD_23]], [[WIDE_LOAD3_23]] +; CHECK-NEXT: store <2 x double> [[TMP23]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 46), align 16 +; CHECK-NEXT: [[WIDE_LOAD_24:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 48), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_24:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 48), align 16 +; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[WIDE_LOAD_24]], [[WIDE_LOAD3_24]] +; CHECK-NEXT: store <2 x double> [[TMP24]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 48), align 16 +; CHECK-NEXT: [[WIDE_LOAD_25:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 50), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_25:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 50), align 16 +; CHECK-NEXT: [[TMP25:%.*]] = fadd <2 x double> [[WIDE_LOAD_25]], [[WIDE_LOAD3_25]] +; CHECK-NEXT: store <2 x double> [[TMP25]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 50), align 16 +; CHECK-NEXT: [[WIDE_LOAD_26:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 52), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_26:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 52), align 16 +; CHECK-NEXT: [[TMP26:%.*]] = fadd <2 x double> [[WIDE_LOAD_26]], [[WIDE_LOAD3_26]] +; CHECK-NEXT: store <2 x double> [[TMP26]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 52), align 16 +; CHECK-NEXT: [[WIDE_LOAD_27:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 54), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_27:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 54), align 16 +; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[WIDE_LOAD_27]], [[WIDE_LOAD3_27]] +; CHECK-NEXT: store <2 x double> [[TMP27]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 54), align 16 +; CHECK-NEXT: [[WIDE_LOAD_28:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 0, i64 56), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_28:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 0, i64 56), align 16 +; CHECK-NEXT: [[TMP28:%.*]] = fadd <2 x double> [[WIDE_LOAD_28]], [[WIDE_LOAD3_28]] +; CHECK-NEXT: store <2 x double> [[TMP28]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 0, i64 56), align 16 +; CHECK-NEXT: [[WIDE_LOAD_29:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @b, i64 1, i64 0), align 16 +; CHECK-NEXT: [[WIDE_LOAD3_29:%.*]] = load <2 x double>, ptr getelementptr inbounds ([58 x double], ptr @c, i64 1, i64 0), align 16 +; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x double> [[WIDE_LOAD_29]], [[WIDE_LOAD3_29]] +; CHECK-NEXT: store <2 x double> [[TMP29]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 1, i64 0), align 16 +; CHECK-NEXT: [[TMP30:%.*]] = load double, ptr getelementptr inbounds ([58 x double], ptr @b, i64 1, i64 2), align 16 +; CHECK-NEXT: [[TMP31:%.*]] = load double, ptr getelementptr inbounds ([58 x double], ptr @c, i64 1, i64 2), align 16 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP30]], [[TMP31]] +; CHECK-NEXT: store double [[ADD]], ptr getelementptr inbounds ([58 x double], ptr @a, i64 1, i64 2), align 16 +; CHECK-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 61 + br i1 %cmp, label %for.body, label %exit + +for.body: + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 %idxprom + %0 = load double, ptr %arrayidx, align 8 + %idxprom1 = sext i32 %i.0 to i64 + %arrayidx2 = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 %idxprom1 + %1 = load double, ptr %arrayidx2, align 8 + %add = fadd double %0, %1 + %idxprom3 = sext i32 %i.0 to i64 + %arrayidx4 = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 %idxprom3 + store double %add, ptr %arrayidx4, align 8 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +exit: + ret void +} + + +define void @test_runtime_trip_count(i32 %N) { +; CHECK-LABEL: @test_runtime_trip_count( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER7:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], 28 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] +; CHECK: vector.ph.new: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP2]], 9223372036854775800 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP4]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x double>, ptr [[TMP5]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD5]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[WIDE_LOAD4]], [[WIDE_LOAD6]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX]] +; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 16 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP11]], align 16 +; CHECK-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX_NEXT]] +; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4_1:%.*]] = load <2 x double>, ptr [[TMP13]], align 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX_NEXT]] +; CHECK-NEXT: [[WIDE_LOAD5_1:%.*]] = load <2 x double>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6_1:%.*]] = load <2 x double>, ptr [[TMP15]], align 16 +; CHECK-NEXT: [[TMP16:%.*]] = fadd <2 x double> [[WIDE_LOAD_1]], [[WIDE_LOAD5_1]] +; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[WIDE_LOAD4_1]], [[WIDE_LOAD6_1]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX_NEXT]] +; CHECK-NEXT: store <2 x double> [[TMP16]], ptr [[TMP18]], align 16 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP17]], ptr [[TMP19]], align 16 +; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX_NEXT_1]] +; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr [[TMP20]], align 16 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4_2:%.*]] = load <2 x double>, ptr [[TMP21]], align 16 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX_NEXT_1]] +; CHECK-NEXT: [[WIDE_LOAD5_2:%.*]] = load <2 x double>, ptr [[TMP22]], align 16 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6_2:%.*]] = load <2 x double>, ptr [[TMP23]], align 16 +; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[WIDE_LOAD_2]], [[WIDE_LOAD5_2]] +; CHECK-NEXT: [[TMP25:%.*]] = fadd <2 x double> [[WIDE_LOAD4_2]], [[WIDE_LOAD6_2]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX_NEXT_1]] +; CHECK-NEXT: store <2 x double> [[TMP24]], ptr [[TMP26]], align 16 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP25]], ptr [[TMP27]], align 16 +; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 12 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX_NEXT_2]] +; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr [[TMP28]], align 16 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4_3:%.*]] = load <2 x double>, ptr [[TMP29]], align 16 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX_NEXT_2]] +; CHECK-NEXT: [[WIDE_LOAD5_3:%.*]] = load <2 x double>, ptr [[TMP30]], align 16 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6_3:%.*]] = load <2 x double>, ptr [[TMP31]], align 16 +; CHECK-NEXT: [[TMP32:%.*]] = fadd <2 x double> [[WIDE_LOAD_3]], [[WIDE_LOAD5_3]] +; CHECK-NEXT: [[TMP33:%.*]] = fadd <2 x double> [[WIDE_LOAD4_3]], [[WIDE_LOAD6_3]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX_NEXT_2]] +; CHECK-NEXT: store <2 x double> [[TMP32]], ptr [[TMP34]], align 16 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP33]], ptr [[TMP35]], align 16 +; CHECK-NEXT: [[INDEX_NEXT_3:%.*]] = or i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX_NEXT_3]] +; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <2 x double>, ptr [[TMP36]], align 16 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4_4:%.*]] = load <2 x double>, ptr [[TMP37]], align 16 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX_NEXT_3]] +; CHECK-NEXT: [[WIDE_LOAD5_4:%.*]] = load <2 x double>, ptr [[TMP38]], align 16 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds double, ptr [[TMP38]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6_4:%.*]] = load <2 x double>, ptr [[TMP39]], align 16 +; CHECK-NEXT: [[TMP40:%.*]] = fadd <2 x double> [[WIDE_LOAD_4]], [[WIDE_LOAD5_4]] +; CHECK-NEXT: [[TMP41:%.*]] = fadd <2 x double> [[WIDE_LOAD4_4]], [[WIDE_LOAD6_4]] +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX_NEXT_3]] +; CHECK-NEXT: store <2 x double> [[TMP40]], ptr [[TMP42]], align 16 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds double, ptr [[TMP42]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP41]], ptr [[TMP43]], align 16 +; CHECK-NEXT: [[INDEX_NEXT_4:%.*]] = or i64 [[INDEX]], 20 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX_NEXT_4]] +; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <2 x double>, ptr [[TMP44]], align 16 +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, ptr [[TMP44]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4_5:%.*]] = load <2 x double>, ptr [[TMP45]], align 16 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX_NEXT_4]] +; CHECK-NEXT: [[WIDE_LOAD5_5:%.*]] = load <2 x double>, ptr [[TMP46]], align 16 +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, ptr [[TMP46]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6_5:%.*]] = load <2 x double>, ptr [[TMP47]], align 16 +; CHECK-NEXT: [[TMP48:%.*]] = fadd <2 x double> [[WIDE_LOAD_5]], [[WIDE_LOAD5_5]] +; CHECK-NEXT: [[TMP49:%.*]] = fadd <2 x double> [[WIDE_LOAD4_5]], [[WIDE_LOAD6_5]] +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX_NEXT_4]] +; CHECK-NEXT: store <2 x double> [[TMP48]], ptr [[TMP50]], align 16 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP49]], ptr [[TMP51]], align 16 +; CHECK-NEXT: [[INDEX_NEXT_5:%.*]] = or i64 [[INDEX]], 24 +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX_NEXT_5]] +; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <2 x double>, ptr [[TMP52]], align 16 +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds double, ptr [[TMP52]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4_6:%.*]] = load <2 x double>, ptr [[TMP53]], align 16 +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX_NEXT_5]] +; CHECK-NEXT: [[WIDE_LOAD5_6:%.*]] = load <2 x double>, ptr [[TMP54]], align 16 +; CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, ptr [[TMP54]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6_6:%.*]] = load <2 x double>, ptr [[TMP55]], align 16 +; CHECK-NEXT: [[TMP56:%.*]] = fadd <2 x double> [[WIDE_LOAD_6]], [[WIDE_LOAD5_6]] +; CHECK-NEXT: [[TMP57:%.*]] = fadd <2 x double> [[WIDE_LOAD4_6]], [[WIDE_LOAD6_6]] +; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX_NEXT_5]] +; CHECK-NEXT: store <2 x double> [[TMP56]], ptr [[TMP58]], align 16 +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP57]], ptr [[TMP59]], align 16 +; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = or i64 [[INDEX]], 28 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX_NEXT_6]] +; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <2 x double>, ptr [[TMP60]], align 16 +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds double, ptr [[TMP60]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4_7:%.*]] = load <2 x double>, ptr [[TMP61]], align 16 +; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX_NEXT_6]] +; CHECK-NEXT: [[WIDE_LOAD5_7:%.*]] = load <2 x double>, ptr [[TMP62]], align 16 +; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds double, ptr [[TMP62]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6_7:%.*]] = load <2 x double>, ptr [[TMP63]], align 16 +; CHECK-NEXT: [[TMP64:%.*]] = fadd <2 x double> [[WIDE_LOAD_7]], [[WIDE_LOAD5_7]] +; CHECK-NEXT: [[TMP65:%.*]] = fadd <2 x double> [[WIDE_LOAD4_7]], [[WIDE_LOAD6_7]] +; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX_NEXT_6]] +; CHECK-NEXT: store <2 x double> [[TMP64]], ptr [[TMP66]], align 16 +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds double, ptr [[TMP66]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP65]], ptr [[TMP67]], align 16 +; CHECK-NEXT: [[INDEX_NEXT_7]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block.unr-lcssa: +; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_7]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL:%.*]] +; CHECK: vector.body.epil: +; CHECK-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[VECTOR_BODY_EPIL]] ], [ 0, [[MIDDLE_BLOCK_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX_EPIL]] +; CHECK-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <2 x double>, ptr [[TMP68]], align 16 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds double, ptr [[TMP68]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD4_EPIL:%.*]] = load <2 x double>, ptr [[TMP69]], align 16 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX_EPIL]] +; CHECK-NEXT: [[WIDE_LOAD5_EPIL:%.*]] = load <2 x double>, ptr [[TMP70]], align 16 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds double, ptr [[TMP70]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD6_EPIL:%.*]] = load <2 x double>, ptr [[TMP71]], align 16 +; CHECK-NEXT: [[TMP72:%.*]] = fadd <2 x double> [[WIDE_LOAD_EPIL]], [[WIDE_LOAD5_EPIL]] +; CHECK-NEXT: [[TMP73:%.*]] = fadd <2 x double> [[WIDE_LOAD4_EPIL]], [[WIDE_LOAD6_EPIL]] +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX_EPIL]] +; CHECK-NEXT: store <2 x double> [[TMP72]], ptr [[TMP74]], align 16 +; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds double, ptr [[TMP74]], i64 2 +; CHECK-NEXT: store <2 x double> [[TMP73]], ptr [[TMP75]], align 16 +; CHECK-NEXT: [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 4 +; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 +; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[FOR_BODY_PREHEADER7]] +; CHECK: for.body.preheader7: +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER7]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP76:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP77:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP76]], [[TMP77]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX4]], align 8 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %N + br i1 %cmp, label %for.body, label %exit + +for.body: + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 %idxprom + %0 = load double, ptr %arrayidx, align 8 + %idxprom1 = sext i32 %i.0 to i64 + %arrayidx2 = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 %idxprom1 + %1 = load double, ptr %arrayidx2, align 8 + %add = fadd double %0, %1 + %idxprom3 = sext i32 %i.0 to i64 + %arrayidx4 = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 %idxprom3 + store double %add, ptr %arrayidx4, align 8 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +exit: + ret void +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll @@ -1,258 +1,49 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -O2 -S -mtriple=x86_64-- -mattr=+sse2 < %s | FileCheck %s --check-prefixes=SSE -; RUN: opt -O2 -S -mtriple=x86_64-- -mattr=+avx2 < %s | FileCheck %s --check-prefixes=AVX +; RUN: opt -O2 -S -mtriple=x86_64-- -mattr=+sse2 < %s | FileCheck %s +; RUN: opt -O2 -S -mtriple=x86_64-- -mattr=+avx2 < %s | FileCheck %s ; PR38280 / Issue #37628 define void @apply_delta(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %neg_offs, i64 noundef %count) { -; SSE-LABEL: @apply_delta( -; SSE-NEXT: entry: -; SSE-NEXT: [[CMP21:%.*]] = icmp ugt i64 [[COUNT:%.*]], 7 -; SSE-NEXT: br i1 [[CMP21]], label [[WHILE_BODY:%.*]], label [[WHILE_COND3_PREHEADER:%.*]] -; SSE: while.cond3.preheader: -; SSE-NEXT: [[COUNT_ADDR_0_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] -; SSE-NEXT: [[SRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[SRC:%.*]], [[ENTRY]] ], [ [[ADD_PTR2:%.*]], [[WHILE_BODY]] ] -; SSE-NEXT: [[DST_ADDR_0_LCSSA:%.*]] = phi ptr [ [[DST:%.*]], [[ENTRY]] ], [ [[ADD_PTR1:%.*]], [[WHILE_BODY]] ] -; SSE-NEXT: [[DST_ADDR_0_LCSSA1:%.*]] = ptrtoint ptr [[DST_ADDR_0_LCSSA]] to i64 -; SSE-NEXT: [[SRC_ADDR_0_LCSSA2:%.*]] = ptrtoint ptr [[SRC_ADDR_0_LCSSA]] to i64 -; SSE-NEXT: [[TOBOOL_NOT27:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], 0 -; SSE-NEXT: br i1 [[TOBOOL_NOT27]], label [[WHILE_END9:%.*]], label [[ITER_CHECK:%.*]] -; SSE: iter.check: -; SSE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[COUNT_ADDR_0_LCSSA]], 8 -; SSE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY4_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; SSE: vector.memcheck: -; SSE-NEXT: [[TMP0:%.*]] = sub i64 [[DST_ADDR_0_LCSSA1]], [[SRC_ADDR_0_LCSSA2]] -; SSE-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 -; SSE-NEXT: [[TMP1:%.*]] = sub i64 [[DST_ADDR_0_LCSSA1]], [[SRC_ADDR_0_LCSSA2]] -; SSE-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP1]], 32 -; SSE-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK3]] -; SSE-NEXT: [[TMP2:%.*]] = add i64 [[NEG_OFFS:%.*]], 31 -; SSE-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 32 -; SSE-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK4]] -; SSE-NEXT: br i1 [[CONFLICT_RDX5]], label [[WHILE_BODY4_PREHEADER]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; SSE: vector.main.loop.iter.check: -; SSE-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i64 [[COUNT_ADDR_0_LCSSA]], 32 -; SSE-NEXT: br i1 [[MIN_ITERS_CHECK6]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] -; SSE: vector.ph: -; SSE-NEXT: [[N_VEC:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], -32 -; SSE-NEXT: br label [[VECTOR_BODY:%.*]] -; SSE: vector.body: -; SSE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SSE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[INDEX]] -; SSE-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[INDEX]] -; SSE-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP8]], align 1 -; SSE-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP8]], i64 16 -; SSE-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 -; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 [[NEG_OFFS]] -; SSE-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 -; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 -; SSE-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; SSE-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD11]], [[WIDE_LOAD]] -; SSE-NEXT: [[TMP7:%.*]] = add <16 x i8> [[WIDE_LOAD12]], [[WIDE_LOAD10]] -; SSE-NEXT: store <16 x i8> [[TMP6]], ptr [[NEXT_GEP]], align 1 -; SSE-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 -; SSE-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1 -; SSE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; SSE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SSE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; SSE: middle.block: -; SSE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], [[N_VEC]] -; SSE-NEXT: br i1 [[CMP_N]], label [[WHILE_END9]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; SSE: vec.epilog.iter.check: -; SSE-NEXT: [[IND_END20:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 31 -; SSE-NEXT: [[IND_END17:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[N_VEC]] -; SSE-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[N_VEC]] -; SSE-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 24 -; SSE-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 -; SSE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[WHILE_BODY4_PREHEADER]], label [[VEC_EPILOG_PH]] -; SSE: vec.epilog.ph: -; SSE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; SSE-NEXT: [[N_VEC14:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], -8 -; SSE-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[N_VEC14]] -; SSE-NEXT: [[IND_END16:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[N_VEC14]] -; SSE-NEXT: [[IND_END19:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 7 -; SSE-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; SSE: vec.epilog.vector.body: -; SSE-NEXT: [[INDEX23:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT28:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; SSE-NEXT: [[NEXT_GEP24:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[INDEX23]] -; SSE-NEXT: [[NEXT_GEP25:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[INDEX23]] -; SSE-NEXT: [[WIDE_LOAD26:%.*]] = load <8 x i8>, ptr [[NEXT_GEP25]], align 1 -; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP24]], i64 [[NEG_OFFS]] -; SSE-NEXT: [[WIDE_LOAD27:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 -; SSE-NEXT: [[TMP11:%.*]] = add <8 x i8> [[WIDE_LOAD27]], [[WIDE_LOAD26]] -; SSE-NEXT: store <8 x i8> [[TMP11]], ptr [[NEXT_GEP24]], align 1 -; SSE-NEXT: [[INDEX_NEXT28]] = add nuw i64 [[INDEX23]], 8 -; SSE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT28]], [[N_VEC14]] -; SSE-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] -; SSE: vec.epilog.middle.block: -; SSE-NEXT: [[CMP_N22:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], [[N_VEC14]] -; SSE-NEXT: br i1 [[CMP_N22]], label [[WHILE_END9]], label [[WHILE_BODY4_PREHEADER]] -; SSE: while.body4.preheader: -; SSE-NEXT: [[DST_ADDR_130_PH:%.*]] = phi ptr [ [[DST_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[DST_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END15]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; SSE-NEXT: [[SRC_ADDR_129_PH:%.*]] = phi ptr [ [[SRC_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[SRC_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END17]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; SSE-NEXT: [[COUNT_ADDR_128_PH:%.*]] = phi i64 [ [[COUNT_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[COUNT_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END20]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; SSE-NEXT: br label [[WHILE_BODY4:%.*]] -; SSE: while.body: -; SSE-NEXT: [[DST_ADDR_024:%.*]] = phi ptr [ [[ADD_PTR1]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] -; SSE-NEXT: [[SRC_ADDR_023:%.*]] = phi ptr [ [[ADD_PTR2]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] -; SSE-NEXT: [[COUNT_ADDR_022:%.*]] = phi i64 [ [[SUB]], [[WHILE_BODY]] ], [ [[COUNT]], [[ENTRY]] ] -; SSE-NEXT: [[TMP13:%.*]] = load <8 x i8>, ptr [[SRC_ADDR_023]], align 1 -; SSE-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 [[NEG_OFFS]] -; SSE-NEXT: [[TMP14:%.*]] = load <8 x i8>, ptr [[ADD_PTR]], align 1 -; SSE-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP14]], [[TMP13]] -; SSE-NEXT: store <8 x i8> [[ADD]], ptr [[DST_ADDR_024]], align 1 -; SSE-NEXT: [[ADD_PTR1]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 8 -; SSE-NEXT: [[ADD_PTR2]] = getelementptr inbounds i8, ptr [[SRC_ADDR_023]], i64 8 -; SSE-NEXT: [[SUB]] = add i64 [[COUNT_ADDR_022]], -8 -; SSE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SUB]], 7 -; SSE-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND3_PREHEADER]] -; SSE: while.body4: -; SSE-NEXT: [[DST_ADDR_130:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY4]] ], [ [[DST_ADDR_130_PH]], [[WHILE_BODY4_PREHEADER]] ] -; SSE-NEXT: [[SRC_ADDR_129:%.*]] = phi ptr [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY4]] ], [ [[SRC_ADDR_129_PH]], [[WHILE_BODY4_PREHEADER]] ] -; SSE-NEXT: [[COUNT_ADDR_128:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY4]] ], [ [[COUNT_ADDR_128_PH]], [[WHILE_BODY4_PREHEADER]] ] -; SSE-NEXT: [[DEC]] = add i64 [[COUNT_ADDR_128]], -1 -; SSE-NEXT: [[TMP15:%.*]] = load i8, ptr [[SRC_ADDR_129]], align 1 -; SSE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 [[NEG_OFFS]] -; SSE-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; SSE-NEXT: [[ADD6:%.*]] = add i8 [[TMP16]], [[TMP15]] -; SSE-NEXT: store i8 [[ADD6]], ptr [[DST_ADDR_130]], align 1 -; SSE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1 -; SSE-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1 -; SSE-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0 -; SSE-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP4:![0-9]+]] -; SSE: while.end9: -; SSE-NEXT: ret void -; -; AVX-LABEL: @apply_delta( -; AVX-NEXT: entry: -; AVX-NEXT: [[CMP21:%.*]] = icmp ugt i64 [[COUNT:%.*]], 7 -; AVX-NEXT: br i1 [[CMP21]], label [[WHILE_BODY:%.*]], label [[WHILE_COND3_PREHEADER:%.*]] -; AVX: while.cond3.preheader: -; AVX-NEXT: [[COUNT_ADDR_0_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] -; AVX-NEXT: [[SRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[SRC:%.*]], [[ENTRY]] ], [ [[ADD_PTR2:%.*]], [[WHILE_BODY]] ] -; AVX-NEXT: [[DST_ADDR_0_LCSSA:%.*]] = phi ptr [ [[DST:%.*]], [[ENTRY]] ], [ [[ADD_PTR1:%.*]], [[WHILE_BODY]] ] -; AVX-NEXT: [[DST_ADDR_0_LCSSA1:%.*]] = ptrtoint ptr [[DST_ADDR_0_LCSSA]] to i64 -; AVX-NEXT: [[SRC_ADDR_0_LCSSA2:%.*]] = ptrtoint ptr [[SRC_ADDR_0_LCSSA]] to i64 -; AVX-NEXT: [[TOBOOL_NOT27:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], 0 -; AVX-NEXT: br i1 [[TOBOOL_NOT27]], label [[WHILE_END9:%.*]], label [[ITER_CHECK:%.*]] -; AVX: iter.check: -; AVX-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[COUNT_ADDR_0_LCSSA]], 16 -; AVX-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY4_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; AVX: vector.memcheck: -; AVX-NEXT: [[TMP0:%.*]] = sub i64 [[DST_ADDR_0_LCSSA1]], [[SRC_ADDR_0_LCSSA2]] -; AVX-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 -; AVX-NEXT: [[TMP1:%.*]] = sub i64 [[DST_ADDR_0_LCSSA1]], [[SRC_ADDR_0_LCSSA2]] -; AVX-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP1]], 128 -; AVX-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK3]] -; AVX-NEXT: [[TMP2:%.*]] = add i64 [[NEG_OFFS:%.*]], 127 -; AVX-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 128 -; AVX-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK4]] -; AVX-NEXT: br i1 [[CONFLICT_RDX5]], label [[WHILE_BODY4_PREHEADER]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; AVX: vector.main.loop.iter.check: -; AVX-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i64 [[COUNT_ADDR_0_LCSSA]], 128 -; AVX-NEXT: br i1 [[MIN_ITERS_CHECK6]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] -; AVX: vector.ph: -; AVX-NEXT: [[N_VEC:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], -128 -; AVX-NEXT: br label [[VECTOR_BODY:%.*]] -; AVX: vector.body: -; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; AVX-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[INDEX]] -; AVX-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[INDEX]] -; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[NEXT_GEP10]], align 1 -; AVX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i64 32 -; AVX-NEXT: [[WIDE_LOAD14:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1 -; AVX-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i64 64 -; AVX-NEXT: [[WIDE_LOAD15:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 -; AVX-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i64 96 -; AVX-NEXT: [[WIDE_LOAD16:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 -; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 [[NEG_OFFS]] -; AVX-NEXT: [[WIDE_LOAD17:%.*]] = load <32 x i8>, ptr [[TMP6]], align 1 -; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 32 -; AVX-NEXT: [[WIDE_LOAD18:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1 -; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 64 -; AVX-NEXT: [[WIDE_LOAD19:%.*]] = load <32 x i8>, ptr [[TMP8]], align 1 -; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 96 -; AVX-NEXT: [[WIDE_LOAD20:%.*]] = load <32 x i8>, ptr [[TMP9]], align 1 -; AVX-NEXT: [[TMP10:%.*]] = add <32 x i8> [[WIDE_LOAD17]], [[WIDE_LOAD]] -; AVX-NEXT: [[TMP11:%.*]] = add <32 x i8> [[WIDE_LOAD18]], [[WIDE_LOAD14]] -; AVX-NEXT: [[TMP12:%.*]] = add <32 x i8> [[WIDE_LOAD19]], [[WIDE_LOAD15]] -; AVX-NEXT: [[TMP13:%.*]] = add <32 x i8> [[WIDE_LOAD20]], [[WIDE_LOAD16]] -; AVX-NEXT: store <32 x i8> [[TMP10]], ptr [[NEXT_GEP]], align 1 -; AVX-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 -; AVX-NEXT: store <32 x i8> [[TMP11]], ptr [[TMP14]], align 1 -; AVX-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64 -; AVX-NEXT: store <32 x i8> [[TMP12]], ptr [[TMP15]], align 1 -; AVX-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96 -; AVX-NEXT: store <32 x i8> [[TMP13]], ptr [[TMP16]], align 1 -; AVX-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 -; AVX-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; AVX: middle.block: -; AVX-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], [[N_VEC]] -; AVX-NEXT: br i1 [[CMP_N]], label [[WHILE_END9]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; AVX: vec.epilog.iter.check: -; AVX-NEXT: [[IND_END28:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 127 -; AVX-NEXT: [[IND_END25:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[N_VEC]] -; AVX-NEXT: [[IND_END23:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[N_VEC]] -; AVX-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 112 -; AVX-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 -; AVX-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[WHILE_BODY4_PREHEADER]], label [[VEC_EPILOG_PH]] -; AVX: vec.epilog.ph: -; AVX-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; AVX-NEXT: [[N_VEC22:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], -16 -; AVX-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[N_VEC22]] -; AVX-NEXT: [[IND_END24:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[N_VEC22]] -; AVX-NEXT: [[IND_END27:%.*]] = and i64 [[COUNT_ADDR_0_LCSSA]], 15 -; AVX-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; AVX: vec.epilog.vector.body: -; AVX-NEXT: [[INDEX31:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT36:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; AVX-NEXT: [[NEXT_GEP32:%.*]] = getelementptr i8, ptr [[DST_ADDR_0_LCSSA]], i64 [[INDEX31]] -; AVX-NEXT: [[NEXT_GEP33:%.*]] = getelementptr i8, ptr [[SRC_ADDR_0_LCSSA]], i64 [[INDEX31]] -; AVX-NEXT: [[WIDE_LOAD34:%.*]] = load <16 x i8>, ptr [[NEXT_GEP33]], align 1 -; AVX-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP32]], i64 [[NEG_OFFS]] -; AVX-NEXT: [[WIDE_LOAD35:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1 -; AVX-NEXT: [[TMP19:%.*]] = add <16 x i8> [[WIDE_LOAD35]], [[WIDE_LOAD34]] -; AVX-NEXT: store <16 x i8> [[TMP19]], ptr [[NEXT_GEP32]], align 1 -; AVX-NEXT: [[INDEX_NEXT36]] = add nuw i64 [[INDEX31]], 16 -; AVX-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT36]], [[N_VEC22]] -; AVX-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] -; AVX: vec.epilog.middle.block: -; AVX-NEXT: [[CMP_N30:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], [[N_VEC22]] -; AVX-NEXT: br i1 [[CMP_N30]], label [[WHILE_END9]], label [[WHILE_BODY4_PREHEADER]] -; AVX: while.body4.preheader: -; AVX-NEXT: [[DST_ADDR_130_PH:%.*]] = phi ptr [ [[DST_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[DST_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; AVX-NEXT: [[SRC_ADDR_129_PH:%.*]] = phi ptr [ [[SRC_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[SRC_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END25]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; AVX-NEXT: [[COUNT_ADDR_128_PH:%.*]] = phi i64 [ [[COUNT_ADDR_0_LCSSA]], [[ITER_CHECK]] ], [ [[COUNT_ADDR_0_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[IND_END28]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; AVX-NEXT: br label [[WHILE_BODY4:%.*]] -; AVX: while.body: -; AVX-NEXT: [[DST_ADDR_024:%.*]] = phi ptr [ [[ADD_PTR1]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] -; AVX-NEXT: [[SRC_ADDR_023:%.*]] = phi ptr [ [[ADD_PTR2]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] -; AVX-NEXT: [[COUNT_ADDR_022:%.*]] = phi i64 [ [[SUB]], [[WHILE_BODY]] ], [ [[COUNT]], [[ENTRY]] ] -; AVX-NEXT: [[TMP21:%.*]] = load <8 x i8>, ptr [[SRC_ADDR_023]], align 1 -; AVX-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 [[NEG_OFFS]] -; AVX-NEXT: [[TMP22:%.*]] = load <8 x i8>, ptr [[ADD_PTR]], align 1 -; AVX-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP22]], [[TMP21]] -; AVX-NEXT: store <8 x i8> [[ADD]], ptr [[DST_ADDR_024]], align 1 -; AVX-NEXT: [[ADD_PTR1]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 8 -; AVX-NEXT: [[ADD_PTR2]] = getelementptr inbounds i8, ptr [[SRC_ADDR_023]], i64 8 -; AVX-NEXT: [[SUB]] = add i64 [[COUNT_ADDR_022]], -8 -; AVX-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SUB]], 7 -; AVX-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND3_PREHEADER]] -; AVX: while.body4: -; AVX-NEXT: [[DST_ADDR_130:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY4]] ], [ [[DST_ADDR_130_PH]], [[WHILE_BODY4_PREHEADER]] ] -; AVX-NEXT: [[SRC_ADDR_129:%.*]] = phi ptr [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY4]] ], [ [[SRC_ADDR_129_PH]], [[WHILE_BODY4_PREHEADER]] ] -; AVX-NEXT: [[COUNT_ADDR_128:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY4]] ], [ [[COUNT_ADDR_128_PH]], [[WHILE_BODY4_PREHEADER]] ] -; AVX-NEXT: [[DEC]] = add i64 [[COUNT_ADDR_128]], -1 -; AVX-NEXT: [[TMP23:%.*]] = load i8, ptr [[SRC_ADDR_129]], align 1 -; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 [[NEG_OFFS]] -; AVX-NEXT: [[TMP24:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; AVX-NEXT: [[ADD6:%.*]] = add i8 [[TMP24]], [[TMP23]] -; AVX-NEXT: store i8 [[ADD6]], ptr [[DST_ADDR_130]], align 1 -; AVX-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1 -; AVX-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1 -; AVX-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0 -; AVX-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP4:![0-9]+]] -; AVX: while.end9: -; AVX-NEXT: ret void +; CHECK-LABEL: @apply_delta( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP21:%.*]] = icmp ugt i64 [[COUNT:%.*]], 7 +; CHECK-NEXT: br i1 [[CMP21]], label [[WHILE_BODY:%.*]], label [[WHILE_COND3_PREHEADER:%.*]] +; CHECK: while.cond3.preheader: +; CHECK-NEXT: [[COUNT_ADDR_0_LCSSA:%.*]] = phi i64 [ [[COUNT]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[SRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[SRC:%.*]], [[ENTRY]] ], [ [[ADD_PTR2:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[DST_ADDR_0_LCSSA:%.*]] = phi ptr [ [[DST:%.*]], [[ENTRY]] ], [ [[ADD_PTR1:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[TOBOOL_NOT27:%.*]] = icmp eq i64 [[COUNT_ADDR_0_LCSSA]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT27]], label [[WHILE_END9:%.*]], label [[WHILE_BODY4:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[DST_ADDR_024:%.*]] = phi ptr [ [[ADD_PTR1]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] +; CHECK-NEXT: [[SRC_ADDR_023:%.*]] = phi ptr [ [[ADD_PTR2]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] +; CHECK-NEXT: [[COUNT_ADDR_022:%.*]] = phi i64 [ [[SUB]], [[WHILE_BODY]] ], [ [[COUNT]], [[ENTRY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[SRC_ADDR_023]], align 1 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 [[NEG_OFFS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add <8 x i8> [[TMP1]], [[TMP0]] +; CHECK-NEXT: store <8 x i8> [[ADD]], ptr [[DST_ADDR_024]], align 1 +; CHECK-NEXT: [[ADD_PTR1]] = getelementptr inbounds i8, ptr [[DST_ADDR_024]], i64 8 +; CHECK-NEXT: [[ADD_PTR2]] = getelementptr inbounds i8, ptr [[SRC_ADDR_023]], i64 8 +; CHECK-NEXT: [[SUB]] = add i64 [[COUNT_ADDR_022]], -8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SUB]], 7 +; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND3_PREHEADER]] +; CHECK: while.body4: +; CHECK-NEXT: [[DST_ADDR_130:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY4]] ], [ [[DST_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] +; CHECK-NEXT: [[SRC_ADDR_129:%.*]] = phi ptr [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY4]] ], [ [[SRC_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] +; CHECK-NEXT: [[COUNT_ADDR_128:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY4]] ], [ [[COUNT_ADDR_0_LCSSA]], [[WHILE_COND3_PREHEADER]] ] +; CHECK-NEXT: [[DEC]] = add i64 [[COUNT_ADDR_128]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[SRC_ADDR_129]], align 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 [[NEG_OFFS]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ADD6:%.*]] = add i8 [[TMP3]], [[TMP2]] +; CHECK-NEXT: store i8 [[ADD6]], ptr [[DST_ADDR_130]], align 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: while.end9: +; CHECK-NEXT: ret void ; entry: %cmp21 = icmp ugt i64 %count, 7 diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll b/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll new file mode 100755 --- /dev/null +++ b/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=rewrite-statepoints-for-gc -rs4gc-clobber-non-live %s | FileCheck %s +; Make sure that clobber-non-live correctly handles vector types + +define void @test_vector_clobber(i8 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK-LABEL: @test_vector_clobber( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_CAST:%.*]] = bitcast i8 addrspace(1)* [[PTR:%.*]] to float addrspace(1)* +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* elementtype(void ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "deopt"(i32 0, i32 2, i32 0, i32 62, i32 0, i32 13, i32 0, i32 7, i8* null, i32 7, i8* null, i32 7, i8* null, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, i8 addrspace(1)* [[PTR]], i32 0, i8 addrspace(1)* [[PTR]], i32 7, i8* null), "gc-live"(i8 addrspace(1)* [[PTR]]) ] +; CHECK-NEXT: [[PTR_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 0, i32 0) +; CHECK-NEXT: [[PTR_CAST_REMAT:%.*]] = bitcast i8 addrspace(1)* [[PTR_RELOCATED]] to float addrspace(1)* +; CHECK-NEXT: [[CAST:%.*]] = bitcast i8 addrspace(1)* [[PTR_RELOCATED]] to float addrspace(1)* +; CHECK-NEXT: [[DOTSPLATINSERT_BASE:%.*]] = insertelement <8 x float addrspace(1)*> zeroinitializer, float addrspace(1)* [[CAST]], i32 0, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x float addrspace(1)*> poison, float addrspace(1)* [[PTR_CAST_REMAT]], i32 0 +; CHECK-NEXT: [[DOTSPLAT_BASE:%.*]] = shufflevector <8 x float addrspace(1)*> [[DOTSPLATINSERT_BASE]], <8 x float addrspace(1)*> undef, <8 x i32> zeroinitializer, !is_base_value !0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x float addrspace(1)*> [[DOTSPLATINSERT]], <8 x float addrspace(1)*> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, <8 x float addrspace(1)*> [[DOTSPLAT]], <8 x i64> +; CHECK-NEXT: [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* elementtype(void ()) @bar, i32 0, i32 0, i32 0, i32 0) [ "deopt"(i32 0, i32 1, i32 0, i32 112, i32 0, i32 13, i32 0, i32 7, i8* null, i32 7, i8* null, i32 3, i32 undef, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, i8 addrspace(1)* [[PTR_RELOCATED]], i32 0, i8 addrspace(1)* [[PTR_RELOCATED]], i32 7, i8* null), "gc-live"(<8 x float addrspace(1)*> [[GEP]], i8 addrspace(1)* [[PTR_RELOCATED]], <8 x float addrspace(1)*> [[DOTSPLAT_BASE]]) ] +; CHECK-NEXT: [[GEP_RELOCATED:%.*]] = call coldcc <8 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v8p1i8(token [[STATEPOINT_TOKEN1]], i32 2, i32 0) +; CHECK-NEXT: [[GEP_RELOCATED_CASTED:%.*]] = bitcast <8 x i8 addrspace(1)*> [[GEP_RELOCATED]] to <8 x float addrspace(1)*> +; CHECK-NEXT: [[PTR_RELOCATED2:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN1]], i32 1, i32 1) +; CHECK-NEXT: [[DOTSPLAT_BASE_RELOCATED:%.*]] = call coldcc <8 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v8p1i8(token [[STATEPOINT_TOKEN1]], i32 2, i32 2) +; CHECK-NEXT: [[DOTSPLAT_BASE_RELOCATED_CASTED:%.*]] = bitcast <8 x i8 addrspace(1)*> [[DOTSPLAT_BASE_RELOCATED]] to <8 x float addrspace(1)*> +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p1f32(<8 x float addrspace(1)*> [[GEP_RELOCATED_CASTED]], i32 4, <8 x i1> , <8 x float> undef) +; CHECK-NEXT: unreachable +; +entry: + %ptr.cast = bitcast i8 addrspace(1)* %ptr to float addrspace(1)* + call void @foo() [ "deopt"(i32 0, i32 2, i32 0, i32 62, i32 0, i32 13, i32 0, i32 7, i8* null, i32 7, i8* null, i32 7, i8* null, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, i8 addrspace(1)* %ptr, i32 0, i8 addrspace(1)* %ptr, i32 7, i8* null) ] + %gep = getelementptr inbounds float, float addrspace(1)* %ptr.cast, <8 x i64> + call void @bar() [ "deopt"(i32 0, i32 1, i32 0, i32 112, i32 0, i32 13, i32 0, i32 7, i8* null, i32 7, i8* null, i32 3, i32 undef, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, i8 addrspace(1)* %ptr, i32 0, i8 addrspace(1)* %ptr, i32 7, i8* null) ] + %res = call <8 x float> @llvm.masked.gather.v8f32.v8p1f32(<8 x float addrspace(1)*> %gep, i32 4, <8 x i1> , <8 x float> undef) + unreachable +} + + +declare void @foo() gc "statepoint-example" + +; Function Attrs: nocallback nofree nosync nounwind readonly willreturn +declare <8 x float> @llvm.masked.gather.v8f32.v8p1f32(<8 x float addrspace(1)*>, i32 immarg, <8 x i1>, <8 x float>) #1 + +declare void @bar() + +attributes #1 = { nocallback nofree nosync nounwind readonly willreturn } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -37,13 +37,13 @@ define float @f_used_out_of_tree(<2 x float> %x) { ; CHECK-LABEL: @f_used_out_of_tree( -; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1 -; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X0]] -; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; CHECK-NEXT: store float [[ADD]], float* @a, align 4 -; CHECK-NEXT: ret float [[X0]] +; CHECK-NEXT: ret float [[TMP1]] ; ; THRESH1-LABEL: @f_used_out_of_tree( ; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefixes=ALL,CHECK -; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefixes=ALL,STORE +; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s +; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s ; #include ; @@ -16,32 +16,32 @@ ; } define i32 @add_red(float* %A, i32 %n) { -; ALL-LABEL: @add_red( -; ALL-NEXT: entry: -; ALL-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; ALL-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; ALL: for.body.lr.ph: -; ALL-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 -; ALL-NEXT: br label [[FOR_BODY:%.*]] -; ALL: for.body: -; ALL-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 -; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* -; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], -; ALL-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) -; ALL-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] -; ALL-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 -; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] -; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; ALL: for.cond.for.end_crit_edge: -; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 -; ALL-NEXT: br label [[FOR_END]] -; ALL: for.end: -; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; ALL-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-LABEL: @add_red( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]] +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32 +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: %cmp31 = icmp sgt i32 %n, 0 @@ -99,34 +99,34 @@ ; } define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) { -; ALL-LABEL: @mul_red( -; ALL-NEXT: entry: -; ALL-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; ALL-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; ALL: for.body.lr.ph: -; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* -; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 -; ALL-NEXT: br label [[FOR_BODY:%.*]] -; ALL: for.body: -; ALL-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 -; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* -; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; ALL-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] -; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; ALL-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] -; ALL-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 -; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] -; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; ALL: for.cond.for.end_crit_edge: -; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 -; ALL-NEXT: br label [[FOR_END]] -; ALL: for.end: -; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; ALL-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-LABEL: @mul_red( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]] +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32 +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: %cmp38 = icmp sgt i32 %n, 0 @@ -196,41 +196,41 @@ ; } define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) { -; ALL-LABEL: @long_red( -; ALL-NEXT: entry: -; ALL-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; ALL-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; ALL: for.body.lr.ph: -; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <8 x float>* -; ALL-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; ALL-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 -; ALL-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 -; ALL-NEXT: br label [[FOR_BODY:%.*]] -; ALL: for.body: -; ALL-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 -; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; ALL-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* -; ALL-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; ALL-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] -; ALL-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 -; ALL-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] -; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 -; ALL-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] -; ALL-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) -; ALL-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[MUL49]] -; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[OP_RDX]] -; ALL-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 -; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] -; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; ALL: for.cond.for.end_crit_edge: -; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 -; ALL-NEXT: br label [[FOR_END]] -; ALL: for.end: -; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; ALL-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-LABEL: @long_red( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <8 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>* +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8 +; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]] +; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4 +; CHECK-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[MUL49]] +; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[OP_RDX]] +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32 +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: %cmp81 = icmp sgt i32 %n, 0 @@ -330,34 +330,34 @@ ; } define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) { -; ALL-LABEL: @chain_red( -; ALL-NEXT: entry: -; ALL-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; ALL-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; ALL: for.body.lr.ph: -; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* -; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 -; ALL-NEXT: br label [[FOR_BODY:%.*]] -; ALL: for.body: -; ALL-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] -; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 -; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* -; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] -; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; ALL-NEXT: [[OP_RDX]] = fadd fast float [[TMP6]], [[SUM_042]] -; ALL-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 -; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] -; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] -; ALL: for.cond.for.end_crit_edge: -; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_RDX]] to i32 -; ALL-NEXT: br label [[FOR_END]] -; ALL: for.end: -; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] -; ALL-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-LABEL: @chain_red( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: [[OP_RDX]] = fadd fast float [[TMP6]], [[SUM_042]] +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_RDX]] to i32 +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: %cmp41 = icmp sgt i32 %n, 0 @@ -437,65 +437,65 @@ ; } define void @foo(float* nocapture readonly %arg_A, i32 %arg_B, float* nocapture %array) { -; ALL-LABEL: @foo( -; ALL-NEXT: entry: -; ALL-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 -; ALL-NEXT: br label [[FOR_BODY:%.*]] -; ALL: for.cond.cleanup: -; ALL-NEXT: ret void -; ALL: for.body: -; ALL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] -; ALL-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 -; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] -; ALL-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; ALL-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 -; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] -; ALL-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -; ALL-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 -; ALL-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] -; ALL-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 -; ALL-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 -; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] -; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 -; ALL-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] -; ALL: for.body16.lr.ph: -; ALL-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] -; ALL-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 -; ALL-NEXT: br label [[FOR_BODY16:%.*]] -; ALL: for.cond.cleanup15: -; ALL-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] -; ALL-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] -; ALL-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] -; ALL-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] -; ALL-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 -; ALL-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 -; ALL-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 -; ALL-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 -; ALL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; ALL-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 -; ALL-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; ALL: for.body16: -; ALL-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] -; ALL-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] -; ALL-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] -; ALL-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] -; ALL-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] -; ALL-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 -; ALL-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 -; ALL-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] -; ALL-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] -; ALL-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 -; ALL-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 -; ALL-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 -; ALL-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 -; ALL-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 -; ALL-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] -; ALL-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] -; ALL-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] -; ALL-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] -; ALL-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 -; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] -; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3 +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +; CHECK-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]] +; CHECK: for.body16.lr.ph: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4 +; CHECK-NEXT: br label [[FOR_BODY16:%.*]] +; CHECK: for.cond.cleanup15: +; CHECK-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ] +; CHECK-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ] +; CHECK-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ] +; CHECK-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ] +; CHECK-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4 +; CHECK-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4 +; CHECK-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6 +; CHECK-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK: for.body16: +; CHECK-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ] +; CHECK-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ] +; CHECK-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ] +; CHECK-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ] +; CHECK-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ] +; CHECK-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000 +; CHECK-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000 +; CHECK-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]] +; CHECK-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]] +; CHECK-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000 +; CHECK-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000 +; CHECK-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000 +; CHECK-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000 +; CHECK-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000 +; CHECK-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]] +; CHECK-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]] +; CHECK-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]] +; CHECK-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]] +; CHECK-NEXT: [[INC]] = add nuw i32 [[J_098]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]] ; entry: %cmp1495 = icmp eq i32 %arg_B, 0 @@ -576,22 +576,20 @@ ; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[B:%.*]], align 8 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX4]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]] -; CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[ARRAYIDX2]], align 8 -; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[TMP0]], [[TMP3]] -; CHECK-NEXT: [[ADD16:%.*]] = or i64 [[MUL]], 1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[ADD16]] -; CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX6]], align 8 -; CHECK-NEXT: [[MUL7:%.*]] = fmul fast double [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[ADD8:%.*]] = fadd fast double [[MUL3]], [[MUL7]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: [[ADD8:%.*]] = fadd fast double [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 [[I_018]] ; CHECK-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_018]], 1 @@ -600,33 +598,6 @@ ; CHECK: for.end: ; CHECK-NEXT: ret void ; -; STORE-LABEL: @store_red_double( -; STORE-NEXT: entry: -; STORE-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; STORE-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; STORE: for.body.lr.ph: -; STORE-NEXT: [[TMP0:%.*]] = bitcast double* [[B:%.*]] to <2 x double>* -; STORE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 -; STORE-NEXT: br label [[FOR_BODY:%.*]] -; STORE: for.body: -; STORE-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2 -; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]] -; STORE-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>* -; STORE-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 -; STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP4]] -; STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; STORE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; STORE-NEXT: [[ADD8:%.*]] = fadd fast double [[TMP6]], [[TMP7]] -; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 [[I_018]] -; STORE-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 -; STORE-NEXT: [[INC]] = add nsw i64 [[I_018]], 1 -; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]] -; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] -; STORE: for.end: -; STORE-NEXT: ret void -; entry: %cmp17 = icmp sgt i32 %n, 0 br i1 %cmp17, label %for.body.lr.ph, label %for.end @@ -676,38 +647,20 @@ ; CHECK-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[B]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[MUL3:%.*]] = fmul fast float [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[ADD34:%.*]] = or i64 [[MUL]], 1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD34]] -; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[MUL7:%.*]] = fmul fast float [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float [[MUL3]], [[MUL7]] -; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[ADD1135:%.*]] = or i64 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1135]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX12]], align 4 -; CHECK-NEXT: [[MUL13:%.*]] = fmul fast float [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], [[MUL13]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX15]], align 4 -; CHECK-NEXT: [[ADD1736:%.*]] = or i64 [[MUL]], 3 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1736]] -; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX18]], align 4 -; CHECK-NEXT: [[MUL19:%.*]] = fmul fast float [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], [[MUL19]] -; CHECK-NEXT: store float [[ADD20]], float* [[C_ADDR_038]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] @@ -715,32 +668,6 @@ ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; -; STORE-LABEL: @store_red( -; STORE-NEXT: entry: -; STORE-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; STORE-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] -; STORE: for.body.lr.ph: -; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 -; STORE-NEXT: br label [[FOR_BODY:%.*]] -; STORE: for.body: -; STORE-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] -; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2 -; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]] -; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* -; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>* -; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]] -; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4 -; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1 -; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1 -; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]] -; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] -; STORE: for.end: -; STORE-NEXT: ret i32 0 -; entry: %cmp37 = icmp sgt i32 %n, 0 br i1 %cmp37, label %for.body.lr.ph, label %for.end @@ -794,23 +721,11 @@ define void @float_red_example4(float* %res) { ; CHECK-LABEL: @float_red_example4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] -; CHECK-NEXT: store float [[ADD_2]], float* [[RES:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP0]]) +; CHECK-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; CHECK-NEXT: ret void ; -; STORE-LABEL: @float_red_example4( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP0]]) -; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 -; STORE-NEXT: ret void -; entry: %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 @@ -826,31 +741,11 @@ define void @float_red_example8(float* %res) { ; CHECK-LABEL: @float_red_example8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] -; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP4]], [[ADD_2]] -; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP5]], [[ADD_3]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP6]], [[ADD_4]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP7]], [[ADD_5]] -; CHECK-NEXT: store float [[ADD_6]], float* [[RES:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]]) +; CHECK-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; CHECK-NEXT: ret void ; -; STORE-LABEL: @float_red_example8( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]]) -; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 -; STORE-NEXT: ret void -; entry: %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 @@ -874,47 +769,11 @@ define void @float_red_example16(float* %res) { ; CHECK-LABEL: @float_red_example16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]] -; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16 -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP4]], [[ADD_2]] -; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4 -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP5]], [[ADD_3]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8 -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP6]], [[ADD_4]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP7]], [[ADD_5]] -; CHECK-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 8), align 16 -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float [[TMP8]], [[ADD_6]] -; CHECK-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 9), align 4 -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float [[TMP9]], [[ADD_7]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 10), align 8 -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float [[TMP10]], [[ADD_8]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 11), align 4 -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float [[TMP11]], [[ADD_9]] -; CHECK-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 12), align 16 -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float [[TMP12]], [[ADD_10]] -; CHECK-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 13), align 4 -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float [[TMP13]], [[ADD_11]] -; CHECK-NEXT: [[TMP14:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 14), align 8 -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float [[TMP14]], [[ADD_12]] -; CHECK-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 15), align 4 -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float [[TMP15]], [[ADD_13]] -; CHECK-NEXT: store float [[ADD_14]], float* [[RES:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP0]]) +; CHECK-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 ; CHECK-NEXT: ret void ; -; STORE-LABEL: @float_red_example16( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP0]]) -; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16 -; STORE-NEXT: ret void -; entry: %0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16 %1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4 @@ -954,23 +813,11 @@ define void @i32_red_example4(i32* %res) { ; CHECK-LABEL: @i32_red_example4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] -; CHECK-NEXT: store i32 [[ADD_2]], i32* [[RES:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]]) +; CHECK-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; CHECK-NEXT: ret void ; -; STORE-LABEL: @i32_red_example4( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]]) -; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 -; STORE-NEXT: ret void -; entry: %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 @@ -986,31 +833,11 @@ define void @i32_red_example8(i32* %res) { ; CHECK-LABEL: @i32_red_example8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] -; CHECK-NEXT: store i32 [[ADD_6]], i32* [[RES:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; CHECK-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; CHECK-NEXT: ret void ; -; STORE-LABEL: @i32_red_example8( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) -; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 -; STORE-NEXT: ret void -; entry: %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 @@ -1034,47 +861,11 @@ define void @i32_red_example16(i32* %res) { ; CHECK-LABEL: @i32_red_example16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 -; CHECK-NEXT: [[ADD_7:%.*]] = add nsw i32 [[TMP8]], [[ADD_6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 -; CHECK-NEXT: [[ADD_8:%.*]] = add nsw i32 [[TMP9]], [[ADD_7]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 -; CHECK-NEXT: [[ADD_9:%.*]] = add nsw i32 [[TMP10]], [[ADD_8]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 -; CHECK-NEXT: [[ADD_10:%.*]] = add nsw i32 [[TMP11]], [[ADD_9]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 -; CHECK-NEXT: [[ADD_11:%.*]] = add nsw i32 [[TMP12]], [[ADD_10]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 -; CHECK-NEXT: [[ADD_12:%.*]] = add nsw i32 [[TMP13]], [[ADD_11]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 -; CHECK-NEXT: [[ADD_13:%.*]] = add nsw i32 [[TMP14]], [[ADD_12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 -; CHECK-NEXT: [[ADD_14:%.*]] = add nsw i32 [[TMP15]], [[ADD_13]] -; CHECK-NEXT: store i32 [[ADD_14]], i32* [[RES:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP0]]) +; CHECK-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; CHECK-NEXT: ret void ; -; STORE-LABEL: @i32_red_example16( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP0]]) -; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 -; STORE-NEXT: ret void -; entry: %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 @@ -1114,79 +905,11 @@ define void @i32_red_example32(i32* %res) { ; CHECK-LABEL: @i32_red_example32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16 -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4 -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8 -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16 -; CHECK-NEXT: [[ADD_7:%.*]] = add nsw i32 [[TMP8]], [[ADD_6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4 -; CHECK-NEXT: [[ADD_8:%.*]] = add nsw i32 [[TMP9]], [[ADD_7]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8 -; CHECK-NEXT: [[ADD_9:%.*]] = add nsw i32 [[TMP10]], [[ADD_8]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4 -; CHECK-NEXT: [[ADD_10:%.*]] = add nsw i32 [[TMP11]], [[ADD_9]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16 -; CHECK-NEXT: [[ADD_11:%.*]] = add nsw i32 [[TMP12]], [[ADD_10]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4 -; CHECK-NEXT: [[ADD_12:%.*]] = add nsw i32 [[TMP13]], [[ADD_11]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8 -; CHECK-NEXT: [[ADD_13:%.*]] = add nsw i32 [[TMP14]], [[ADD_12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4 -; CHECK-NEXT: [[ADD_14:%.*]] = add nsw i32 [[TMP15]], [[ADD_13]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 16), align 16 -; CHECK-NEXT: [[ADD_15:%.*]] = add nsw i32 [[TMP16]], [[ADD_14]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 17), align 4 -; CHECK-NEXT: [[ADD_16:%.*]] = add nsw i32 [[TMP17]], [[ADD_15]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 18), align 8 -; CHECK-NEXT: [[ADD_17:%.*]] = add nsw i32 [[TMP18]], [[ADD_16]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 19), align 4 -; CHECK-NEXT: [[ADD_18:%.*]] = add nsw i32 [[TMP19]], [[ADD_17]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 20), align 16 -; CHECK-NEXT: [[ADD_19:%.*]] = add nsw i32 [[TMP20]], [[ADD_18]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 21), align 4 -; CHECK-NEXT: [[ADD_20:%.*]] = add nsw i32 [[TMP21]], [[ADD_19]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 22), align 8 -; CHECK-NEXT: [[ADD_21:%.*]] = add nsw i32 [[TMP22]], [[ADD_20]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 23), align 4 -; CHECK-NEXT: [[ADD_22:%.*]] = add nsw i32 [[TMP23]], [[ADD_21]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 24), align 16 -; CHECK-NEXT: [[ADD_23:%.*]] = add nsw i32 [[TMP24]], [[ADD_22]] -; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 25), align 4 -; CHECK-NEXT: [[ADD_24:%.*]] = add nsw i32 [[TMP25]], [[ADD_23]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 26), align 8 -; CHECK-NEXT: [[ADD_25:%.*]] = add nsw i32 [[TMP26]], [[ADD_24]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 27), align 4 -; CHECK-NEXT: [[ADD_26:%.*]] = add nsw i32 [[TMP27]], [[ADD_25]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 28), align 16 -; CHECK-NEXT: [[ADD_27:%.*]] = add nsw i32 [[TMP28]], [[ADD_26]] -; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 29), align 4 -; CHECK-NEXT: [[ADD_28:%.*]] = add nsw i32 [[TMP29]], [[ADD_27]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 30), align 8 -; CHECK-NEXT: [[ADD_29:%.*]] = add nsw i32 [[TMP30]], [[ADD_28]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 31), align 4 -; CHECK-NEXT: [[ADD_30:%.*]] = add nsw i32 [[TMP31]], [[ADD_29]] -; CHECK-NEXT: store i32 [[ADD_30]], i32* [[RES:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP0]]) +; CHECK-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 ; CHECK-NEXT: ret void ; -; STORE-LABEL: @i32_red_example32( -; STORE-NEXT: entry: -; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16 -; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP0]]) -; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16 -; STORE-NEXT: ret void -; entry: %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 %1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 @@ -1258,12 +981,12 @@ declare i32 @foobar(i32) define void @i32_red_call(i32 %val) { -; ALL-LABEL: @i32_red_call( -; ALL-NEXT: entry: -; ALL-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) -; ALL-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) -; ALL-NEXT: ret void +; CHECK-LABEL: @i32_red_call( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]]) +; CHECK-NEXT: ret void ; entry: %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 @@ -1286,18 +1009,18 @@ } define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 { -; ALL-LABEL: @i32_red_invoke( -; ALL-NEXT: entry: -; ALL-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 -; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) -; ALL-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) -; ALL-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] -; ALL: exception: -; ALL-NEXT: [[CLEANUP:%.*]] = landingpad i8 -; ALL-NEXT: cleanup -; ALL-NEXT: br label [[NORMAL]] -; ALL: normal: -; ALL-NEXT: ret void +; CHECK-LABEL: @i32_red_invoke( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]]) +; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]]) +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void ; entry: %0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 @@ -1325,17 +1048,17 @@ ; Test case from PR47670. Reduction result is used as incoming value in phi. define i32 @reduction_result_used_in_phi(i32* nocapture readonly %data, i1 zeroext %b) { -; ALL-LABEL: @reduction_result_used_in_phi( -; ALL-NEXT: entry: -; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] -; ALL: bb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>* -; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; ALL-NEXT: br label [[EXIT]] -; ALL: exit: -; ALL-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] -; ALL-NEXT: ret i32 [[SUM_1]] +; CHECK-LABEL: @reduction_result_used_in_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] +; CHECK-NEXT: ret i32 [[SUM_1]] ; entry: br i1 %b, label %bb, label %exit @@ -1359,17 +1082,17 @@ } define i32 @reduction_result_used_in_phi_loop(i32* nocapture readonly %data, i1 zeroext %b) { -; ALL-LABEL: @reduction_result_used_in_phi_loop( -; ALL-NEXT: entry: -; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] -; ALL: bb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>* -; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; ALL-NEXT: br label [[EXIT]] -; ALL: exit: -; ALL-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] -; ALL-NEXT: ret i32 [[SUM_1]] +; CHECK-LABEL: @reduction_result_used_in_phi_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ] +; CHECK-NEXT: ret i32 [[SUM_1]] ; entry: br i1 %b, label %bb, label %exit @@ -1395,15 +1118,15 @@ ; Make sure we do not crash or infinite loop on ill-formed IR. define void @unreachable_block() { -; ALL-LABEL: @unreachable_block( -; ALL-NEXT: bb.0: -; ALL-NEXT: br label [[BB_1:%.*]] -; ALL: dead: -; ALL-NEXT: [[T0:%.*]] = add i16 [[T0]], undef -; ALL-NEXT: br label [[BB_1]] -; ALL: bb.1: -; ALL-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] -; ALL-NEXT: ret void +; CHECK-LABEL: @unreachable_block( +; CHECK-NEXT: bb.0: +; CHECK-NEXT: br label [[BB_1:%.*]] +; CHECK: dead: +; CHECK-NEXT: [[T0:%.*]] = add i16 [[T0]], undef +; CHECK-NEXT: br label [[BB_1]] +; CHECK: bb.1: +; CHECK-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ] +; CHECK-NEXT: ret void ; bb.0: br label %bb.1 @@ -1420,11 +1143,11 @@ ; The FMF on the reduction should match the incoming insts. define float @fadd_v4f32_fmf(float* %p) { -; ALL-LABEL: @fadd_v4f32_fmf( -; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) -; ALL-NEXT: ret float [[TMP3]] +; CHECK-LABEL: @fadd_v4f32_fmf( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) +; CHECK-NEXT: ret float [[TMP3]] ; %p1 = getelementptr inbounds float, float* %p, i64 1 %p2 = getelementptr inbounds float, float* %p, i64 2 @@ -1444,11 +1167,11 @@ ; In this example, "contract nnan arcp" are dropped, but "ninf" transfers with the required flags. define float @fadd_v4f32_fmf_intersect(float* %p) { -; ALL-LABEL: @fadd_v4f32_fmf_intersect( -; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) -; ALL-NEXT: ret float [[TMP3]] +; CHECK-LABEL: @fadd_v4f32_fmf_intersect( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]]) +; CHECK-NEXT: ret float [[TMP3]] ; %p1 = getelementptr inbounds float, float* %p, i64 1 %p2 = getelementptr inbounds float, float* %p, i64 2 @@ -1467,23 +1190,11 @@ define void @nsw_propagation_v4i32(i32* %res, i32 %start) { ; CHECK-LABEL: @nsw_propagation_v4i32( -; CHECK-NEXT: [[T0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 -; CHECK-NEXT: [[T1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 -; CHECK-NEXT: [[T2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8 -; CHECK-NEXT: [[T3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4 -; CHECK-NEXT: [[S:%.*]] = add nsw i32 [[START:%.*]], [[T0]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T1]], [[S]] -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[T2]], [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[T3]], [[ADD_1]] -; CHECK-NEXT: store i32 [[ADD_2]], i32* [[RES:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP2]], [[START:%.*]] +; CHECK-NEXT: store i32 [[OP_RDX]], i32* [[RES:%.*]], align 16 ; CHECK-NEXT: ret void -; -; STORE-LABEL: @nsw_propagation_v4i32( -; STORE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16 -; STORE-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) -; STORE-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP2]], [[START:%.*]] -; STORE-NEXT: store i32 [[OP_RDX]], i32* [[RES:%.*]], align 16 -; STORE-NEXT: ret void ; %t0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16 %t1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple x86_64-unknown-linux-gnu < %s | FileCheck %s + +define <4 x double> @test(double* %p2, double %i1754, double %i1781, double %i1778) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I1771:%.*]] = getelementptr inbounds double, double* [[P2:%.*]], i64 54 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I1754:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[I1778:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[I1754]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[I1792:%.*]] = fmul fast double [[I1754]], [[I1781:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[I1771]] to <2 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP0]], double [[I1781]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> [[TMP10]], double [[I1792]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> , double [[TMP9]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <4 x double> [[TMP12]], [[TMP13]] +; CHECK-NEXT: ret <4 x double> [[TMP14]] +; +entry: + %i1771 = getelementptr inbounds double, double* %p2, i64 54 + %i1772 = load double, double* %i1771, align 8 + %i1773 = fmul fast double %i1772, %i1754 + %i1782 = fmul fast double %i1754, %i1754 + %i1783 = fadd fast double %i1782, 1.000000e+00 + %i1787 = fmul fast double %i1778, %i1754 + %i1788 = fadd fast double %i1787, 1.000000e+00 + %i1792 = fmul fast double %i1754, %i1781 + %i1793 = fadd fast double %i1792, 1.000000e+00 + %i1795 = getelementptr inbounds double, double* %p2, i64 55 + %i1796 = load double, double* %i1795, align 8 + %i1797 = fmul fast double %i1796, %i1781 + %i1798 = fadd fast double %i1773, %i1797 + %i1976 = insertelement <4 x double> zeroinitializer, double %i1783, i64 0 + %i1982 = insertelement <4 x double> %i1976, double %i1788, i64 1 + %i1988 = insertelement <4 x double> %i1982, double %i1793, i64 2 + %i1994 = insertelement <4 x double> %i1988, double %i1798, i64 3 + ret <4 x double> %i1994 +} diff --git a/llvm/test/Transforms/SimplifyCFG/nonintegral.ll b/llvm/test/Transforms/SimplifyCFG/nonintegral.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/nonintegral.ll @@ -0,0 +1,28 @@ +; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s + +target datalayout = "ni:1" + +define void @test_01(i64 addrspace(1)* align 8 %ptr) { +; CHECK-LABEL: @test_01( +; CHECK-NOT: ptrtoint +; CHECK-NEXT: icmp eq i64 addrspace(1)* %ptr, null +; CHECK-NOT: ptrtoint + %cond1 = icmp eq i64 addrspace(1)* %ptr, null + %cond2 = icmp eq i64 addrspace(1)* %ptr, null + br i1 %cond1, label %true1, label %false1 + +true1: + br i1 %cond2, label %true2, label %false2 + +false1: + store i64 1, i64 addrspace(1)* %ptr, align 8 + br label %true1 + +true2: + store i64 2, i64 addrspace(1)* %ptr, align 8 + ret void + +false2: + store i64 3, i64 addrspace(1)* %ptr, align 8 + ret void +} diff --git a/llvm/test/Transforms/TypePromotion/AArch64/vla-zext.ll b/llvm/test/Transforms/TypePromotion/AArch64/vla-zext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/TypePromotion/AArch64/vla-zext.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=aarch64 -type-promotion -verify -S %s -o - | FileCheck %s + +define dso_local void @foo(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef writeonly %c, i64 noundef %n) local_unnamed_addr { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[B_VECTOR:%.*]] = phi [ [[TMP1]], [[ENTRY]] ], [ [[B_VECTOR_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = zext [[B_VECTOR]] to +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[B_VECTOR_NEXT]] = load , ptr [[TMP9]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[TMP10]], label [[END:%.*]], label [[VECTOR_BODY]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr inbounds i8, ptr %b, i64 0 + %1 = load , ptr %0, align 1 + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl nuw nsw i64 %2, 3 + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %b_vector = phi [ %1, %entry ], [ %b_vector.next, %vector.body ] + %4 = getelementptr inbounds i32, ptr %a, i64 %index + %5 = load , ptr %4, align 4 + %6 = zext %b_vector to + %7 = add %5, %6 + %8 = getelementptr inbounds i32, ptr %c, i64 %index + store %7, ptr %8, align 4 + %9 = getelementptr inbounds i8, ptr %b, i64 %index + %b_vector.next = load , ptr %9, align 1 + %index.next = add nuw i64 %index, %3 + %10 = icmp eq i64 %index.next, %n + br i1 %10, label %end, label %vector.body + +end: ; preds = %vector.body + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind readnone willreturn +declare i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/Util/flattencfg.ll b/llvm/test/Transforms/Util/flattencfg.ll --- a/llvm/test/Transforms/Util/flattencfg.ll +++ b/llvm/test/Transforms/Util/flattencfg.ll @@ -216,3 +216,20 @@ exit: ret void } + +; This would crash. + +declare i1 @llvm.smax.i1(i1, i1) #0 + +; CHECK-LABEL: @PR56875 +define void @PR56875(i1 %val_i1_5) { +entry_1: + ret void + +bb_2: ; preds = %bb_4 + br label %bb_4 + +bb_4: ; preds = %bb_4, %bb_2 + %val_i1_46 = call i1 @llvm.smax.i1(i1 %val_i1_5, i1 %val_i1_5) + br i1 %val_i1_46, label %bb_4, label %bb_2 +} diff --git a/llvm/test/tools/llvm-objdump/MachO/chained-fixups.yaml b/llvm/test/tools/llvm-objdump/MachO/chained-fixups.yaml --- a/llvm/test/tools/llvm-objdump/MachO/chained-fixups.yaml +++ b/llvm/test/tools/llvm-objdump/MachO/chained-fixups.yaml @@ -1,102 +1,107 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump -p %t | FileCheck %s # RUN: llvm-otool -l %t | FileCheck %s -# + # CHECK: LC_DYLD_CHAINED_FIXUPS # CHECK: LC_DYLD_EXPORTS_TRIE +# RUN: llvm-objdump --macho --chained-fixups %t | \ +# RUN: FileCheck --check-prefix=DETAILS -DNAME=%t %s +# RUN: llvm-otool -chained_fixups %t | \ +# RUN: FileCheck --check-prefix=DETAILS -DNAME=%t %s + +# DETAILS: [[NAME]]: +# DETAILS-NEXT: chained fixups header (LC_DYLD_CHAINED_FIXUPS) +# DETAILS-NEXT: fixups_version = 0 +# DETAILS-NEXT: starts_offset = 32 +# DETAILS-NEXT: imports_offset = 44 +# DETAILS-NEXT: symbols_offset = 44 +# DETAILS-NEXT: imports_count = 0 +# DETAILS-NEXT: imports_format = 1 (DYLD_CHAINED_IMPORT) +# DETAILS-NEXT: symbols_format = 0 + +## This yaml is from a dylib produced by ld64 +## echo ".global _foo\n_foo" > dylib.s +## clang -target=x86_64-apple-macos12 -dynamiclib -isysroot Inputs/MacOSX.sdk dylib.s -o libdylib.dylib +## obj2yaml --raw-segment=data libdylib.dylib --- !mach-o +IsLittleEndian: true FileHeader: magic: 0xFEEDFACF - cputype: 0x100000C - cpusubtype: 0x0 - filetype: 0x2 - ncmds: 16 - sizeofcmds: 744 - flags: 0x200085 + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x6 + ncmds: 13 + sizeofcmds: 568 + flags: 0x100085 reserved: 0x0 LoadCommands: - cmd: LC_SEGMENT_64 - cmdsize: 72 - segname: __PAGEZERO - vmaddr: 0 - vmsize: 4294967296 - fileoff: 0 - filesize: 0 - maxprot: 0 - initprot: 0 - nsects: 0 - flags: 0 - - cmd: LC_SEGMENT_64 - cmdsize: 232 + cmdsize: 152 segname: __TEXT - vmaddr: 4294967296 + vmaddr: 0 vmsize: 16384 fileoff: 0 filesize: 16384 maxprot: 5 initprot: 5 - nsects: 2 + nsects: 1 flags: 0 Sections: - sectname: __text segname: __TEXT - addr: 0x100003F98 - size: 24 - offset: 0x3F98 - align: 2 + addr: 0x4000 + size: 0 + offset: 0x4000 + align: 0 reloff: 0x0 nreloc: 0 flags: 0x80000400 reserved1: 0x0 reserved2: 0x0 reserved3: 0x0 - content: C0035FD6FF4300D100008052FF0F00B9FF430091C0035FD6 - - sectname: __unwind_info - segname: __TEXT - addr: 0x100003FB0 - size: 80 - offset: 0x3FB0 - align: 2 - reloff: 0x0 - nreloc: 0 - flags: 0x0 - reserved1: 0x0 - reserved2: 0x0 - reserved3: 0x0 - content: 010000001C000000000000001C000000000000001C00000002000000983F00003400000034000000B13F00000000000034000000030000000C0002001400020000000001040000000010000200000002 + content: '' - cmd: LC_SEGMENT_64 cmdsize: 72 segname: __LINKEDIT - vmaddr: 4294983680 + vmaddr: 16384 vmsize: 16384 fileoff: 16384 - filesize: 753 + filesize: 96 maxprot: 1 initprot: 1 nsects: 0 flags: 0 + - cmd: LC_ID_DYLIB + cmdsize: 48 + dylib: + name: 24 + timestamp: 1 + current_version: 0 + compatibility_version: 0 + Content: libdylib.dylib + ZeroPadBytes: 3 - cmd: LC_DYLD_CHAINED_FIXUPS cmdsize: 16 dataoff: 16384 - datasize: 56 + datasize: 48 - cmd: LC_DYLD_EXPORTS_TRIE cmdsize: 16 - dataoff: 16440 - datasize: 56 + dataoff: 16432 + datasize: 16 - cmd: LC_SYMTAB cmdsize: 24 - symoff: 16504 - nsyms: 15 - stroff: 16744 - strsize: 120 + symoff: 16456 + nsyms: 1 + stroff: 16472 + strsize: 8 - cmd: LC_DYSYMTAB cmdsize: 80 ilocalsym: 0 - nlocalsym: 12 - iextdefsym: 12 - nextdefsym: 3 - iundefsym: 15 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 nundefsym: 0 tocoff: 0 ntoc: 0 @@ -110,136 +115,37 @@ nextrel: 0 locreloff: 0 nlocrel: 0 - - cmd: LC_LOAD_DYLINKER - cmdsize: 32 - name: 12 - Content: '/usr/lib/dyld' - ZeroPadBytes: 7 - cmd: LC_UUID cmdsize: 24 - uuid: F445529E-643C-3A38-8F59-AB64566BCAFF + uuid: 52409B91-DF59-346A-A63F-D4E6FFDC3E04 - cmd: LC_BUILD_VERSION cmdsize: 32 platform: 1 minos: 786432 - sdk: 786432 + sdk: 851968 ntools: 1 Tools: - tool: 3 - version: 46596096 + version: 53674242 - cmd: LC_SOURCE_VERSION cmdsize: 16 version: 0 - - cmd: LC_MAIN - cmdsize: 24 - entryoff: 16284 - stacksize: 0 - cmd: LC_LOAD_DYLIB cmdsize: 56 dylib: name: 24 timestamp: 2 - current_version: 85917696 + current_version: 65793 compatibility_version: 65536 - Content: '/usr/lib/libSystem.B.dylib' - ZeroPadBytes: 6 + Content: '/usr/lib/libSystem.dylib' + ZeroPadBytes: 8 - cmd: LC_FUNCTION_STARTS cmdsize: 16 - dataoff: 16496 + dataoff: 16448 datasize: 8 - cmd: LC_DATA_IN_CODE cmdsize: 16 - dataoff: 16504 + dataoff: 16456 datasize: 0 - - cmd: LC_CODE_SIGNATURE - cmdsize: 16 - dataoff: 16864 - datasize: 273 -LinkEditData: - NameList: - - n_strx: 33 - n_type: 0x64 - n_sect: 0 - n_desc: 0 - n_value: 0 - - n_strx: 39 - n_type: 0x64 - n_sect: 0 - n_desc: 0 - n_value: 0 - - n_strx: 46 - n_type: 0x66 - n_sect: 0 - n_desc: 1 - n_value: 1636754403 - - n_strx: 1 - n_type: 0x2E - n_sect: 1 - n_desc: 0 - n_value: 4294983576 - - n_strx: 109 - n_type: 0x24 - n_sect: 1 - n_desc: 0 - n_value: 4294983576 - - n_strx: 1 - n_type: 0x24 - n_sect: 0 - n_desc: 0 - n_value: 4 - - n_strx: 1 - n_type: 0x4E - n_sect: 1 - n_desc: 0 - n_value: 4 - - n_strx: 1 - n_type: 0x2E - n_sect: 1 - n_desc: 0 - n_value: 4294983580 - - n_strx: 114 - n_type: 0x24 - n_sect: 1 - n_desc: 0 - n_value: 4294983580 - - n_strx: 1 - n_type: 0x24 - n_sect: 0 - n_desc: 0 - n_value: 20 - - n_strx: 1 - n_type: 0x4E - n_sect: 1 - n_desc: 0 - n_value: 20 - - n_strx: 1 - n_type: 0x64 - n_sect: 1 - n_desc: 0 - n_value: 0 - - n_strx: 2 - n_type: 0xF - n_sect: 1 - n_desc: 16 - n_value: 4294967296 - - n_strx: 22 - n_type: 0xF - n_sect: 1 - n_desc: 0 - n_value: 4294983576 - - n_strx: 27 - n_type: 0xF - n_sect: 1 - n_desc: 0 - n_value: 4294983580 - StringTable: - - ' ' - - __mh_execute_header - - _foo - - _main - - '/tmp/' - - main.c - - '/var/folders/gj/wf3swl0x215b2sq1qy84kzkm0000gn/T/main-e32fe7.o' - - _foo - - _main +__LINKEDIT: 00000000200000002C0000002C000000000000000100000000000000000000000200000000000000000000000000000000015F666F6F000804008080010000000000000000000000020000000F010000004000000000000020005F666F6F0000 ... diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir b/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir --- a/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir +++ b/llvm/test/tools/llvm-reduce/mir/preserve-block-info.mir @@ -8,7 +8,7 @@ # RESULT: bb.0.entry: # RESULT: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 0, implicit $exec -# RESULT: bb.1 (address-taken, align 8): +# RESULT: bb.1 (ir-block-address-taken %ir-block.exitblock, align 8): # RESULT: bb.2 (landing-pad, align 16): # RESULT: bb.3 (inlineasm-br-indirect-target): # RESULT: bb.4 (ehfunclet-entry): @@ -19,6 +19,7 @@ # RESULT-NEXT: successors: %bb.9(0x66666666), %bb.10(0x1999999a) # RESULT: bb.9: # RESULT: bb.10.exitblock: +# RESULT: bb.11 (machine-block-address-taken): --- | define void @func(i32 %size) { @@ -48,7 +49,7 @@ S_NOP 0 %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - bb.1 (address-taken, align 8): + bb.1 (ir-block-address-taken %ir-block.exitblock, align 8): bb.2 (landing-pad, align 16): @@ -69,4 +70,6 @@ bb.10.exitblock: S_ENDPGM 0, implicit %0 + + bb.11 (machine-block-address-taken): ... diff --git a/llvm/test/tools/llvm-reduce/simplify-cfg.ll b/llvm/test/tools/llvm-reduce/simplify-cfg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/simplify-cfg.ll @@ -0,0 +1,33 @@ +; RUN: llvm-reduce --delta-passes=simplify-cfg --test %python --test-arg %p/Inputs/remove-bbs.py -abort-on-invalid-reduction %s -o %t + +; RUN: FileCheck --check-prefix=CHECK-FINAL %s --input-file=%t +; CHECK-FINAL: @f1 +; CHECK-FINAL-NOT: x6: +; CHECK-FINAL-NOT: x10: + +define void @f1(ptr %interesting3, i32 %interesting2) { + %x3 = alloca ptr, i32 0, align 8 + store ptr %interesting3, ptr %interesting3, align 8 + switch i32 %interesting2, label %interesting1 [ + i32 0, label %x6 + i32 1, label %x11 + ] + +x4: + %x5 = call ptr @f2() + br label %x10 + +x10: + br label %interesting1 + +x6: + br label %x11 + +x11: + br label %interesting1 + +interesting1: + ret void +} + +declare ptr @f2() diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -722,8 +722,8 @@ // Returns true if S is valid as a C language identifier. static bool isValidCIdentifier(StringRef S) { return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && - std::all_of(S.begin() + 1, S.end(), - [](char C) { return C == '_' || isAlnum(C); }); + llvm::all_of(llvm::drop_begin(S), + [](char C) { return C == '_' || isAlnum(C); }); } static bool isUndefined(ld_plugin_symbol &Sym) { diff --git a/llvm/tools/llvm-objdump/MachODump.h b/llvm/tools/llvm-objdump/MachODump.h --- a/llvm/tools/llvm-objdump/MachODump.h +++ b/llvm/tools/llvm-objdump/MachODump.h @@ -36,6 +36,7 @@ extern bool Bind; extern bool DataInCode; extern std::string DisSymName; +extern bool ChainedFixups; extern bool DyldInfo; extern bool DylibId; extern bool DylibsUsed; diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -81,6 +81,7 @@ bool objdump::FunctionStarts; bool objdump::LinkOptHints; bool objdump::InfoPlist; +bool objdump::ChainedFixups; bool objdump::DyldInfo; bool objdump::DylibsUsed; bool objdump::DylibId; @@ -112,6 +113,7 @@ FunctionStarts = InputArgs.hasArg(OBJDUMP_function_starts); LinkOptHints = InputArgs.hasArg(OBJDUMP_link_opt_hints); InfoPlist = InputArgs.hasArg(OBJDUMP_info_plist); + ChainedFixups = InputArgs.hasArg(OBJDUMP_chained_fixups); DyldInfo = InputArgs.hasArg(OBJDUMP_dyld_info); DylibsUsed = InputArgs.hasArg(OBJDUMP_dylibs_used); DylibId = InputArgs.hasArg(OBJDUMP_dylib_id); @@ -1193,6 +1195,48 @@ reportError(std::move(Err), Obj->getFileName()); } +static void +PrintChainedFixupsHeader(const MachO::dyld_chained_fixups_header &H) { + outs() << "chained fixups header (LC_DYLD_CHAINED_FIXUPS)\n"; + outs() << " fixups_version = " << H.fixups_version << '\n'; + outs() << " starts_offset = " << H.starts_offset << '\n'; + outs() << " imports_offset = " << H.imports_offset << '\n'; + outs() << " symbols_offset = " << H.symbols_offset << '\n'; + outs() << " imports_count = " << H.imports_count << '\n'; + + outs() << " imports_format = " << H.imports_format; + switch (H.imports_format) { + case llvm::MachO::DYLD_CHAINED_IMPORT: + outs() << " (DYLD_CHAINED_IMPORT)"; + break; + case llvm::MachO::DYLD_CHAINED_IMPORT_ADDEND: + outs() << " (DYLD_CHAINED_IMPORT_ADDEND)"; + break; + case llvm::MachO::DYLD_CHAINED_IMPORT_ADDEND64: + outs() << " (DYLD_CHAINED_IMPORT_ADDEND64)"; + break; + } + outs() << '\n'; + + outs() << " symbols_format = " << H.symbols_format; + if (H.symbols_format == llvm::MachO::DYLD_CHAINED_SYMBOL_ZLIB) + outs() << " (zlib compressed)"; + outs() << '\n'; +} + +static void PrintChainedFixups(MachOObjectFile *O) { + // MachOObjectFile::getChainedFixupsHeader() reads LC_DYLD_CHAINED_FIXUPS. + // FIXME: Support chained fixups in __TEXT,__chain_starts section too. + auto ChainedFixupHeader = + unwrapOrError(O->getChainedFixupsHeader(), O->getFileName()); + if (!ChainedFixupHeader) + return; + + PrintChainedFixupsHeader(*ChainedFixupHeader); + + // FIXME: Print more things. +} + static void PrintDyldInfo(MachOObjectFile *O) { outs() << "dyld information:" << '\n'; printMachOChainedFixups(O); @@ -1916,8 +1960,9 @@ // UniversalHeaders or ArchiveHeaders. if (Disassemble || Relocations || PrivateHeaders || ExportsTrie || Rebase || Bind || SymbolTable || LazyBind || WeakBind || IndirectSymbols || - DataInCode || FunctionStarts || LinkOptHints || DyldInfo || DylibsUsed || - DylibId || Rpaths || ObjcMetaData || (!FilterSections.empty())) { + DataInCode || FunctionStarts || LinkOptHints || ChainedFixups || + DyldInfo || DylibsUsed || DylibId || Rpaths || ObjcMetaData || + (!FilterSections.empty())) { if (LeadingHeaders) { outs() << Name; if (!ArchiveMemberName.empty()) @@ -1986,6 +2031,8 @@ DumpSectionContents(FileName, MachOOF, Verbose); if (InfoPlist) DumpInfoPlistSectionContents(FileName, MachOOF); + if (ChainedFixups) + PrintChainedFixups(MachOOF); if (DyldInfo) PrintDyldInfo(MachOOF); if (DylibsUsed) diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td --- a/llvm/tools/llvm-objdump/ObjdumpOpts.td +++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td @@ -299,11 +299,15 @@ "Mach-O objects (requires --macho)">, Group; +def chained_fixups : Flag<["--"], "chained-fixups">, + HelpText<"Print chained fixup information (requires --macho)">, + Group; + def dyld_info : Flag<["--"], "dyld_info">, - HelpText<"Print bind and rebase information used by dyld to resolve " - "external references in a final linked binary " - "(requires --macho)">, - Group; + HelpText<"Print bind and rebase information used by dyld to resolve " + "external references in a final linked binary " + "(requires --macho)">, + Group; def dylibs_used : Flag<["--"], "dylibs-used">, HelpText<"Print the shared libraries used for linked " diff --git a/llvm/tools/llvm-objdump/OtoolOpts.td b/llvm/tools/llvm-objdump/OtoolOpts.td --- a/llvm/tools/llvm-objdump/OtoolOpts.td +++ b/llvm/tools/llvm-objdump/OtoolOpts.td @@ -37,13 +37,15 @@ def x : Flag<["-"], "x">, HelpText<"print all text sections">; def X : Flag<["-"], "X">, HelpText<"omit leading addresses or headers">; +def chained_fixups : Flag<["-"], "chained_fixups">, + HelpText<"print chained fixup information">; + // Not (yet?) implemented: // def a : Flag<["-"], "a">, HelpText<"print archive header">; // -c print argument strings of a core file // -m don't use archive(member) syntax // -dyld_info // -dyld_opcodes -// -chained_fixups // -addr_slide=arg // -function_offsets diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2787,6 +2787,8 @@ FilterSections.push_back(",__text"); LeadingAddr = LeadingHeaders = !InputArgs.hasArg(OTOOL_X); + ChainedFixups = InputArgs.hasArg(OTOOL_chained_fixups); + InputFilenames = InputArgs.getAllArgValues(OTOOL_INPUT); if (InputFilenames.empty()) reportCmdLineError("no input file"); @@ -2990,11 +2992,12 @@ !DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST && !Relocations && !SectionHeaders && !SectionContents && !SymbolTable && !DynamicSymbolTable && !UnwindInfo && !FaultMapSection && !Offloading && - !(MachOOpt && (Bind || DataInCode || DyldInfo || DylibId || DylibsUsed || - ExportsTrie || FirstPrivateHeader || FunctionStarts || - IndirectSymbols || InfoPlist || LazyBind || LinkOptHints || - ObjcMetaData || Rebase || Rpaths || UniversalHeaders || - WeakBind || !FilterSections.empty()))) { + !(MachOOpt && + (Bind || DataInCode || ChainedFixups || DyldInfo || DylibId || + DylibsUsed || ExportsTrie || FirstPrivateHeader || FunctionStarts || + IndirectSymbols || InfoPlist || LazyBind || LinkOptHints || + ObjcMetaData || Rebase || Rpaths || UniversalHeaders || WeakBind || + !FilterSections.empty()))) { T->printHelp(ToolName); return 2; } diff --git a/llvm/tools/llvm-reduce/CMakeLists.txt b/llvm/tools/llvm-reduce/CMakeLists.txt --- a/llvm/tools/llvm-reduce/CMakeLists.txt +++ b/llvm/tools/llvm-reduce/CMakeLists.txt @@ -49,6 +49,7 @@ deltas/ReduceRegisterMasks.cpp deltas/ReduceRegisterDefs.cpp deltas/ReduceRegisterUses.cpp + deltas/ReduceUsingSimplifyCFG.cpp deltas/RunIRPasses.cpp deltas/SimplifyInstructions.cpp llvm-reduce.cpp diff --git a/llvm/tools/llvm-reduce/DeltaManager.cpp b/llvm/tools/llvm-reduce/DeltaManager.cpp --- a/llvm/tools/llvm-reduce/DeltaManager.cpp +++ b/llvm/tools/llvm-reduce/DeltaManager.cpp @@ -39,6 +39,7 @@ #include "deltas/ReduceRegisterMasks.h" #include "deltas/ReduceRegisterUses.h" #include "deltas/ReduceSpecialGlobals.h" +#include "deltas/ReduceUsingSimplifyCFG.h" #include "deltas/ReduceVirtualRegisters.h" #include "deltas/RunIRPasses.h" #include "deltas/SimplifyInstructions.h" @@ -75,6 +76,7 @@ DELTA_PASS("operands-to-args", reduceOperandsToArgsDeltaPass) \ DELTA_PASS("operands-skip", reduceOperandsSkipDeltaPass) \ DELTA_PASS("operand-bundles", reduceOperandBundesDeltaPass) \ + DELTA_PASS("simplify-cfg", reduceUsingSimplifyCFGDeltaPass) \ DELTA_PASS("attributes", reduceAttributesDeltaPass) \ DELTA_PASS("module-data", reduceModuleDataDeltaPass) \ } while (false) diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp --- a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp +++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp @@ -206,8 +206,10 @@ DstMF->CreateMachineBasicBlock(SrcMBB.getBasicBlock()); Src2DstMBB[&SrcMBB] = DstMBB; - if (SrcMBB.hasAddressTaken()) - DstMBB->setHasAddressTaken(); + if (SrcMBB.isIRBlockAddressTaken()) + DstMBB->setAddressTakenIRBlock(SrcMBB.getAddressTakenIRBlock()); + if (SrcMBB.isMachineBlockAddressTaken()) + DstMBB->setMachineBlockAddressTaken(); // FIXME: This is not serialized if (SrcMBB.hasLabelMustBeEmitted()) diff --git a/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.h b/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.h new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.h @@ -0,0 +1,23 @@ +//===- ReduceUsingSimplifyCFG.h - Specialized Delta Pass ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to call SimplifyCFG on individual basic blocks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_SIMPLIFYCFG_H +#define LLVM_TOOLS_LLVM_REDUCE_DELTAS_SIMPLIFYCFG_H + +#include "Delta.h" + +namespace llvm { +void reduceUsingSimplifyCFGDeltaPass(TestRunner &Test); +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.cpp b/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.cpp @@ -0,0 +1,34 @@ +//===- ReduceUsingSimplifyCFG.h - Specialized Delta Pass ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to call SimplifyCFG on individual basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "ReduceUsingSimplifyCFG.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; + +static void reduceUsingSimplifyCFG(Oracle &O, Module &Program) { + SmallVector ToSimplify; + for (auto &F : Program) + for (auto &BB : F) + if (!O.shouldKeep()) + ToSimplify.push_back(&BB); + TargetTransformInfo TTI(Program.getDataLayout()); + for (auto *BB : ToSimplify) + simplifyCFG(BB, TTI); +} + +void llvm::reduceUsingSimplifyCFGDeltaPass(TestRunner &Test) { + outs() << "*** Reducing using SimplifyCFG...\n"; + runDeltaPass(Test, reduceUsingSimplifyCFG); +} diff --git a/llvm/tools/llvm-reduce/llvm-reduce.cpp b/llvm/tools/llvm-reduce/llvm-reduce.cpp --- a/llvm/tools/llvm-reduce/llvm-reduce.cpp +++ b/llvm/tools/llvm-reduce/llvm-reduce.cpp @@ -17,27 +17,19 @@ #include "DeltaManager.h" #include "ReducerWorkItem.h" #include "TestRunner.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CodeGen/CommandFlags.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" -#include "llvm/MC/TargetRegistry.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Host.h" #include "llvm/Support/InitLLVM.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetSelect.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include #include @@ -117,9 +109,19 @@ void writeBitcode(ReducerWorkItem &M, llvm::raw_ostream &OutStream) { if (M.LTOInfo && M.LTOInfo->IsThinLTO && M.LTOInfo->EnableSplitLTOUnit) { - legacy::PassManager PM; - PM.add(llvm::createWriteThinLTOBitcodePass(OutStream)); - PM.run(*(M.M)); + PassBuilder PB; + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + ModulePassManager MPM; + MPM.addPass(ThinLTOBitcodeWriterPass(OutStream, nullptr)); + MPM.run(*M.M, MAM); } else { std::unique_ptr Index; if (M.LTOInfo && M.LTOInfo->HasSummary) { diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -970,8 +970,8 @@ report_fatal_error("Text output is incompatible with -module-hash"); Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder)); } else if (OutputThinLTOBC) - Passes.add(createWriteThinLTOBitcodePass( - *OS, ThinLinkOut ? &ThinLinkOut->os() : nullptr)); + report_fatal_error( + "Use the new pass manager for printing ThinLTO bitcode"); else Passes.add(createBitcodeWriterPass(*OS, PreserveBitcodeUseListOrder, EmitSummaryIndex, EmitModuleHash)); diff --git a/llvm/unittests/ADT/SmallSetTest.cpp b/llvm/unittests/ADT/SmallSetTest.cpp --- a/llvm/unittests/ADT/SmallSetTest.cpp +++ b/llvm/unittests/ADT/SmallSetTest.cpp @@ -21,11 +21,17 @@ SmallSet s1; - for (int i = 0; i < 4; i++) - s1.insert(i); + for (int i = 0; i < 4; i++) { + auto InsertResult = s1.insert(i); + EXPECT_EQ(*InsertResult.first, i); + EXPECT_EQ(InsertResult.second, true); + } - for (int i = 0; i < 4; i++) - s1.insert(i); + for (int i = 0; i < 4; i++) { + auto InsertResult = s1.insert(i); + EXPECT_EQ(*InsertResult.first, i); + EXPECT_EQ(InsertResult.second, false); + } EXPECT_EQ(4u, s1.size()); @@ -38,8 +44,17 @@ TEST(SmallSetTest, Grow) { SmallSet s1; - for (int i = 0; i < 8; i++) - s1.insert(i); + for (int i = 0; i < 8; i++) { + auto InsertResult = s1.insert(i); + EXPECT_EQ(*InsertResult.first, i); + EXPECT_EQ(InsertResult.second, true); + } + + for (int i = 0; i < 8; i++) { + auto InsertResult = s1.insert(i); + EXPECT_EQ(*InsertResult.first, i); + EXPECT_EQ(InsertResult.second, false); + } EXPECT_EQ(8u, s1.size()); diff --git a/llvm/unittests/CodeGen/MFCommon.inc b/llvm/unittests/CodeGen/MFCommon.inc --- a/llvm/unittests/CodeGen/MFCommon.inc +++ b/llvm/unittests/CodeGen/MFCommon.inc @@ -116,8 +116,9 @@ BogusSubtarget ST; }; -std::unique_ptr createTargetMachine() { - return std::make_unique(); +BogusTargetMachine *createTargetMachine() { + static BogusTargetMachine BogusTM; + return &BogusTM; } std::unique_ptr createMachineFunction(LLVMContext &Ctx, @@ -127,7 +128,7 @@ auto TM = createTargetMachine(); unsigned FunctionNum = 42; - MachineModuleInfo MMI(TM.get()); + MachineModuleInfo MMI(TM); const TargetSubtargetInfo &STI = *TM->getSubtargetImpl(*F); return std::make_unique(*F, *TM, STI, FunctionNum, MMI); diff --git a/llvm/unittests/CodeGen/MachineOperandTest.cpp b/llvm/unittests/CodeGen/MachineOperandTest.cpp --- a/llvm/unittests/CodeGen/MachineOperandTest.cpp +++ b/llvm/unittests/CodeGen/MachineOperandTest.cpp @@ -7,6 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/ilist_node.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" @@ -15,14 +20,19 @@ #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include "gtest/gtest.h" using namespace llvm; namespace { +// Include helper functions to ease the manipulation of MachineFunctions. +#include "MFCommon.inc" + TEST(MachineOperandTest, ChangeToTargetIndexTest) { // Creating a MachineOperand to change it to TargetIndex MachineOperand MO = MachineOperand::CreateImm(50); @@ -46,13 +56,17 @@ } TEST(MachineOperandTest, PrintRegisterMask) { - uint32_t Dummy; - MachineOperand MO = MachineOperand::CreateRegMask(&Dummy); + LLVMContext Ctx; + Module Mod("Module", Ctx); + auto MF = createMachineFunction(Ctx, Mod); + + uint32_t *Dummy = MF->allocateRegMask(); + MachineOperand MO = MachineOperand::CreateRegMask(Dummy); // Checking some preconditions on the newly created // MachineOperand. ASSERT_TRUE(MO.isRegMask()); - ASSERT_TRUE(MO.getRegMask() == &Dummy); + ASSERT_TRUE(MO.getRegMask() == Dummy); // Print a MachineOperand containing a RegMask. Here we check that without a // TRI and IntrinsicInfo we still print a less detailed regmask. diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -2,7 +2,9 @@ set(LLVM_LINK_COMPONENTS Support) -add_tablegen(llvm-tblgen LLVM DESTINATION "${LLVM_TOOLS_INSTALL_DIR}" +add_tablegen(llvm-tblgen LLVM + DESTINATION "${LLVM_TOOLS_INSTALL_DIR}" + EXPORT LLVM AsmMatcherEmitter.cpp AsmWriterEmitter.cpp AsmWriterInst.cpp diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h --- a/llvm/utils/TableGen/CodeGenIntrinsics.h +++ b/llvm/utils/TableGen/CodeGenIntrinsics.h @@ -154,6 +154,7 @@ NoCapture, NoAlias, NoUndef, + NonNull, Returned, ReadOnly, WriteOnly, diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -893,6 +893,9 @@ } else if (R->isSubClassOf("NoUndef")) { unsigned ArgNo = R->getValueAsInt("ArgNo"); ArgumentAttributes.emplace_back(ArgNo, NoUndef, 0); + } else if (R->isSubClassOf("NonNull")) { + unsigned ArgNo = R->getValueAsInt("ArgNo"); + ArgumentAttributes.emplace_back(ArgNo, NonNull, 0); } else if (R->isSubClassOf("Returned")) { unsigned ArgNo = R->getValueAsInt("ArgNo"); ArgumentAttributes.emplace_back(ArgNo, Returned, 0); diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -700,11 +700,13 @@ unsigned numAttrs = 0; // The argument attributes are alreadys sorted by argument index. + assert(is_sorted(Intrinsic.ArgumentAttributes) && + "Argument attributes are not sorted"); + unsigned Ai = 0, Ae = Intrinsic.ArgumentAttributes.size(); if (Ae) { while (Ai != Ae) { unsigned AttrIdx = Intrinsic.ArgumentAttributes[Ai].Index; - OS << " const Attribute::AttrKind AttrParam" << AttrIdx << "[]= {"; ListSeparator LS(","); @@ -721,6 +723,9 @@ case CodeGenIntrinsic::NoUndef: OS << LS << "Attribute::NoUndef"; break; + case CodeGenIntrinsic::NonNull: + OS << LS << "Attribute::NonNull"; + break; case CodeGenIntrinsic::Returned: OS << LS << "Attribute::Returned"; break; @@ -756,7 +761,8 @@ OS << LSV << V; OS << "};\n"; } - + // AttributeList::ReturnIndex = 0, AttrParam0 corresponds to return + // value. OS << " AS[" << numAttrs++ << "] = AttributeList::get(C, " << AttrIdx << ", AttrParam" << AttrIdx; if (!AllValuesAreZero) diff --git a/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp b/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp --- a/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp +++ b/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp @@ -361,7 +361,7 @@ char32_t Codepoint = Entry.first; const std::string &Name = Entry.second; // Ignore names which are not valid. - if (Name.empty() || !std::all_of(Name.begin(), Name.end(), [](char C) { + if (Name.empty() || !llvm::all_of(Name, [](char C) { return llvm::is_contained(Letters, C); })) { continue; diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn --- a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn @@ -7,7 +7,10 @@ deps += [ "//compiler-rt/lib/msan" ] } if (current_os == "linux" || current_os == "android") { - deps += [ "//compiler-rt/lib/ubsan_minimal" ] + deps += [ + "//compiler-rt/lib/ubsan", + "//compiler-rt/lib/ubsan_minimal", + ] } if (current_os != "win" && current_os != "baremetal") { deps += [ "//compiler-rt/lib/asan" ] diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/ubsan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/ubsan/BUILD.gn --- a/llvm/utils/gn/secondary/compiler-rt/lib/ubsan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/ubsan/BUILD.gn @@ -1,3 +1,12 @@ +import("//compiler-rt/target.gni") + +group("ubsan") { + deps = [ + ":ubsan_standalone", + ":ubsan_standalone_cxx", + ] +} + source_set("sources") { configs -= [ "//llvm/utils/gn/build:llvm_code" ] configs += [ "//llvm/utils/gn/build:crt_code" ] @@ -46,7 +55,6 @@ sources = [ "ubsan_win_dynamic_runtime_thunk.cpp" ] } -# Unreferenced; at the moment exists to make sync_source_lists_from_cmake happy. source_set("standalone_sources") { configs -= [ "//llvm/utils/gn/build:llvm_code" ] configs -= [ "//llvm/utils/gn/build:no_rtti" ] @@ -57,6 +65,11 @@ "ubsan_init_standalone.cpp", "ubsan_signals_standalone.cpp", ] + deps = [ + ":sources", + "//compiler-rt/lib/interception:sources", + "//compiler-rt/lib/sanitizer_common:sources", + ] } source_set("cxx_sources") { @@ -72,3 +85,34 @@ "ubsan_type_hash_win.cpp", ] } + +# FIXME: Make ubsan_standalone work on mac. +if (current_os != "mac") { + static_library("ubsan_standalone") { + output_dir = crt_current_out_dir + output_name = "clang_rt.ubsan_standalone$crt_current_target_suffix" + complete_static_lib = true + configs -= [ + "//llvm/utils/gn/build:llvm_code", + "//llvm/utils/gn/build:thin_archive", + ] + deps = [ + ":sources", + ":standalone_sources", + ] + configs += [ "//llvm/utils/gn/build:crt_code" ] + sources = [ "ubsan_init_standalone_preinit.cpp" ] + } + + static_library("ubsan_standalone_cxx") { + output_dir = crt_current_out_dir + output_name = "clang_rt.ubsan_standalone_cxx$crt_current_target_suffix" + complete_static_lib = true + configs -= [ + "//llvm/utils/gn/build:llvm_code", + "//llvm/utils/gn/build:thin_archive", + ] + deps = [ ":cxx_sources" ] + configs += [ "//llvm/utils/gn/build:crt_code" ] + } +} diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn @@ -25,6 +25,7 @@ "ELF.cpp", "ELFLinkGraphBuilder.cpp", "ELF_aarch64.cpp", + "ELF_i386.cpp", "ELF_riscv.cpp", "ELF_x86_64.cpp", "JITLink.cpp", @@ -36,6 +37,7 @@ "MachO_x86_64.cpp", "MemoryFlags.cpp", "aarch64.cpp", + "i386.cpp", "riscv.cpp", "x86_64.cpp", ] diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn @@ -40,6 +40,7 @@ "deltas/ReduceRegisterMasks.cpp", "deltas/ReduceRegisterUses.cpp", "deltas/ReduceSpecialGlobals.cpp", + "deltas/ReduceUsingSimplifyCFG.cpp", "deltas/ReduceVirtualRegisters.cpp", "deltas/RunIRPasses.cpp", "deltas/SimplifyInstructions.cpp", diff --git a/llvm/utils/release/test-release.sh b/llvm/utils/release/test-release.sh --- a/llvm/utils/release/test-release.sh +++ b/llvm/utils/release/test-release.sh @@ -35,6 +35,7 @@ do_libunwind="yes" do_test_suite="yes" do_openmp="yes" +do_bolt="no" do_lld="yes" do_lldb="yes" do_polly="yes" @@ -163,6 +164,12 @@ -no-openmp ) do_openmp="no" ;; + -bolt ) + do_bolt="yes" + ;; + -no-bolt ) + do_bolt="no" + ;; -no-lld ) do_lld="no" ;; @@ -265,6 +272,9 @@ if [ $do_openmp = "yes" ]; then projects="$projects openmp" fi +if [ $do_bolt = "yes" ]; then + projects="$projects bolt" +fi if [ $do_lld = "yes" ]; then projects="$projects lld" fi diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake --- a/mlir/cmake/modules/AddMLIR.cmake +++ b/mlir/cmake/modules/AddMLIR.cmake @@ -159,7 +159,7 @@ " filepath: \"${LLVM_TARGET_DEFINITIONS_ABSOLUTE}\"\n" " includes: \"${CMAKE_CURRENT_SOURCE_DIR};${tblgen_includes}\"\n" ) - + add_public_tablegen_target(${target}) endfunction() @@ -490,6 +490,17 @@ ${ARG_PUBLIC_LIBS} ) target_sources(${name} PRIVATE ${_objects}) + + # Linux defaults to allowing undefined symbols in shared libraries whereas + # many other platforms are more strict. We want these libraries to be + # self contained, and we want any undefined symbols to be reported at + # library construction time, not at library use, so make Linux strict too. + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + target_link_options(${name} PRIVATE + "LINKER:-z,defs" + ) + endif() + # TODO: Should be transitive. set_target_properties(${name} PROPERTIES MLIR_AGGREGATE_EXCLUDE_LIBS "${_embed_libs}") diff --git a/mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h --- a/mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h +++ b/mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h @@ -41,9 +41,6 @@ /// The state is initialized by default. bool isUninitialized() const override { return false; } - /// The state is always initialized. - ChangeResult defaultInitialize() override { return ChangeResult::NoChange; } - /// Set the state of the program point to live. ChangeResult setToLive(); @@ -101,9 +98,6 @@ /// The state is initialized by default. bool isUninitialized() const override { return false; } - /// The state is always initialized. - ChangeResult defaultInitialize() override { return ChangeResult::NoChange; } - /// Print the known predecessors. void print(raw_ostream &os) const override; diff --git a/mlir/include/mlir/Analysis/DataFlow/DenseAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/DenseAnalysis.h --- a/mlir/include/mlir/Analysis/DataFlow/DenseAnalysis.h +++ b/mlir/include/mlir/Analysis/DataFlow/DenseAnalysis.h @@ -42,10 +42,6 @@ /// Reset the dense lattice to a pessimistic value. This occurs when the /// analysis cannot reason about the data-flow. virtual ChangeResult reset() = 0; - - /// Returns true if the lattice state has reached a pessimistic fixpoint. That - /// is, no further modifications to the lattice can occur. - virtual bool isAtFixpoint() const = 0; }; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Analysis/DataFlow/SparseAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/SparseAnalysis.h --- a/mlir/include/mlir/Analysis/DataFlow/SparseAnalysis.h +++ b/mlir/include/mlir/Analysis/DataFlow/SparseAnalysis.h @@ -38,10 +38,6 @@ /// if the value of the lattice changed. virtual ChangeResult join(const AbstractSparseLattice &rhs) = 0; - /// Returns true if the lattice element is at fixpoint and further calls to - /// `join` will not update the value of the element. - virtual bool isAtFixpoint() const = 0; - /// Mark the lattice element as having reached a pessimistic fixpoint. This /// means that the lattice may potentially have conflicting value states, and /// only the most conservative value should be relied on. @@ -97,16 +93,6 @@ /// Returns true if the value of this lattice hasn't yet been initialized. bool isUninitialized() const override { return !optimisticValue.has_value(); } - /// Force the initialization of the element by setting it to its pessimistic - /// fixpoint. - ChangeResult defaultInitialize() override { - return markPessimisticFixpoint(); - } - - /// Returns true if the lattice has reached a fixpoint. A fixpoint is when - /// the information optimistically assumed to be true is the same as the - /// information known to be true. - bool isAtFixpoint() const override { return optimisticValue == knownValue; } /// Join the information contained in the 'rhs' lattice into this /// lattice. Returns if the state of the current lattice changed. @@ -114,8 +100,8 @@ const Lattice &rhsLattice = static_cast &>(rhs); - // If we are at a fixpoint, or rhs is uninitialized, there is nothing to do. - if (isAtFixpoint() || rhsLattice.isUninitialized()) + // If rhs is uninitialized, there is nothing to do. + if (rhsLattice.isUninitialized()) return ChangeResult::NoChange; // Join the rhs value into this lattice. @@ -150,7 +136,7 @@ /// means that the lattice may potentially have conflicting value states, /// and only the conservatively known value state should be relied on. ChangeResult markPessimisticFixpoint() override { - if (isAtFixpoint()) + if (optimisticValue == knownValue) return ChangeResult::NoChange; // For this fixed point, we take whatever we knew to be true and set that diff --git a/mlir/include/mlir/Analysis/DataFlowFramework.h b/mlir/include/mlir/Analysis/DataFlowFramework.h --- a/mlir/include/mlir/Analysis/DataFlowFramework.h +++ b/mlir/include/mlir/Analysis/DataFlowFramework.h @@ -291,10 +291,6 @@ /// Returns true if the analysis state is uninitialized. virtual bool isUninitialized() const = 0; - /// Force an uninitialized analysis state to initialize itself with a default - /// value. - virtual ChangeResult defaultInitialize() = 0; - /// Print the contents of the analysis state. virtual void print(raw_ostream &os) const = 0; diff --git a/mlir/include/mlir/Conversion/MathToLibm/MathToLibm.h b/mlir/include/mlir/Conversion/MathToLibm/MathToLibm.h --- a/mlir/include/mlir/Conversion/MathToLibm/MathToLibm.h +++ b/mlir/include/mlir/Conversion/MathToLibm/MathToLibm.h @@ -15,8 +15,10 @@ class OperationPass; /// Populate the given list with patterns that convert from Math to Libm calls. -void populateMathToLibmConversionPatterns(RewritePatternSet &patterns, - PatternBenefit benefit); +/// If log1pBenefit is present, use it instead of benefit for the Log1p op. +void populateMathToLibmConversionPatterns( + RewritePatternSet &patterns, PatternBenefit benefit, + llvm::Optional log1pBenefit = llvm::None); /// Create a pass to convert Math operations to libm calls. std::unique_ptr> createConvertMathToLibmPass(); diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -369,7 +369,10 @@ /// to /// /// %iv = %lb + %procId * %step - CyclicNumProcsEqNumIters = 2 + CyclicNumProcsEqNumIters = 2, + + /// No Distribution. + None = 3 }; /// Callback function type used to get processor ID, and number of processors @@ -377,11 +380,10 @@ struct ProcInfo { Value procId; Value nprocs; + DistributionMethod distributionMethod; }; -using ProcInfoCallBackFn = std::function( +using ProcInfoCallBackFn = std::function( OpBuilder &b, Location loc, ArrayRef parallelLoopRanges)>; -using OneDimProcInfoCallBackFn = - std::function; /// Options that allow distribution of loops generated in Linalg transforms to /// processors while generating the loops. @@ -389,21 +391,10 @@ /// Callback function that returns the Values for processor ID (`procId`), and /// number of processors (`nprocs`) used to execute the parallel loops. The /// number of `{procId, nprocs}` pairs returned must be equal to the number of - /// `parallelLoopRanges` passed into the callback, which in-turn is same as - /// the number of parallel loops for which the `distributionMethod` is - /// specified below. + /// `parallelLoopRanges` passed into the callback. The `parallelLoopRanges` + /// are ranges of the outer parallel loops of the operation that + /// do have non-zero tile sizes specified. ProcInfoCallBackFn procInfo; - /// Specification of how to distribute the `scf.parallel` loops that are - /// generated. As the `scf.parallel` loop is generated, the elements of this - /// vector is used (from left to right) and the specified distribution is - /// applied. If the vector is less than the number of `scf.parallel` loops - /// generated, then no distribution is applied. - SmallVector distributionMethod = {}; - - /// The map keyed by the distribution type that contains callback functions - /// that return the Values for processor ID (`procId`), and number of - /// processors (`nprocs`) used to execute the parallel loops. - DenseMap procInfoMap; }; /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`. @@ -521,8 +512,7 @@ function_ref bodyBuilderFn, - Optional = None, - ArrayRef distributionTypes = {}); + ArrayRef procInfo = {}); }; } // namespace linalg diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td --- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td +++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td @@ -538,6 +538,7 @@ %a = math.ipowi %b, %c : i32 ``` }]; + let hasFolder = 1; } //===----------------------------------------------------------------------===// @@ -769,4 +770,51 @@ }]; } +//===----------------------------------------------------------------------===// +// FPowIOp +//===----------------------------------------------------------------------===// + +def Math_FPowIOp : Math_Op<"fpowi", + [SameOperandsAndResultShape, AllTypesMatch<["lhs", "result"]>]> { + let summary = "floating point raised to the signed integer power"; + let description = [{ + Syntax: + + ``` + operation ::= ssa-id `=` `math.fpowi` ssa-use `,` ssa-use `:` type + ``` + + The `fpowi` operation takes a `base` operand of floating point type + (i.e. scalar, tensor or vector) and a `power` operand of integer type + (also scalar, tensor or vector) and returns one result of the same type + as `base`. The result is `base` raised to the power of `power`. + The operation is elementwise for non-scalars, e.g.: + + ```mlir + %v = math.fpowi %base, %power : vector<2xf32>, vector<2xi32 + ``` + + The result is a vector of: + + ``` + [, ] + ``` + + Example: + + ```mlir + // Scalar exponentiation. + %a = math.fpowi %base, %power : f64, i32 + ``` + }]; + + let arguments = (ins FloatLike:$lhs, SignlessIntegerLike:$rhs); + let results = (outs FloatLike:$result); + let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs)"; + + // TODO: add a constant folder using pow[f] for cases, when + // the power argument is exactly representable in floating + // point type of the base. +} + #endif // MATH_OPS diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVArithmeticOps.td @@ -27,12 +27,12 @@ // In addition to normal types arithmetic instructions can support cooperative // matrix. let arguments = (ins - SPV_ScalarOrVectorOrCoopMatrixOf:$operand1, - SPV_ScalarOrVectorOrCoopMatrixOf:$operand2 + SPV_ScalarOrVectorOrCoopMatrixOfOrJointMatrixOf:$operand1, + SPV_ScalarOrVectorOrCoopMatrixOfOrJointMatrixOf:$operand2 ); let results = (outs - SPV_ScalarOrVectorOrCoopMatrixOf:$result + SPV_ScalarOrVectorOrCoopMatrixOfOrJointMatrixOf:$result ); let assemblyFormat = "operands attr-dict `:` type($result)"; } diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.td --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.td @@ -64,6 +64,27 @@ TypedArrayAttrBase; +// Description of the supported joint matrix operations. See +// https://github.com/intel/llvm/blob/sycl/sycl/doc/design/spirv-extensions/SPV_INTEL_joint_matrix.asciidoc +def SPV_JointMatrixPropertiesINTELAttr : + SPV_Attr<"JointMatrixPropertiesINTEL", "joint_matrix_props"> { + let parameters = (ins + "int":$m_size, + "int":$n_size, + "int":$k_size, + "mlir::Type":$a_type, + "mlir::Type":$b_type, + "mlir::Type":$c_type, + "mlir::Type":$result_type, + "mlir::spirv::ScopeAttr":$scope + ); + let assemblyFormat = "`<` struct(params) `>`"; +} + +def SPV_JointMatrixPropertiesINTELArrayAttr : + TypedArrayAttrBase; + // This attribute specifies the limits for various resources on the target // architecture. // diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td @@ -387,6 +387,7 @@ def SPV_INTEL_fp_fast_math_mode : I32EnumAttrCase<"SPV_INTEL_fp_fast_math_mode", 4027>; def SPV_INTEL_memory_access_aliasing : I32EnumAttrCase<"SPV_INTEL_memory_access_aliasing", 4028>; def SPV_INTEL_split_barrier : I32EnumAttrCase<"SPV_INTEL_split_barrier", 4029>; +def SPV_INTEL_joint_matrix : I32EnumAttrCase<"SPV_INTEL_joint_matrix", 4030>; def SPV_NV_compute_shader_derivatives : I32EnumAttrCase<"SPV_NV_compute_shader_derivatives", 5000>; def SPV_NV_cooperative_matrix : I32EnumAttrCase<"SPV_NV_cooperative_matrix", 5001>; @@ -443,7 +444,7 @@ SPV_INTEL_usm_storage_classes, SPV_INTEL_io_pipes, SPV_INTEL_blocking_pipes, SPV_INTEL_fpga_reg, SPV_INTEL_long_constant_composite, SPV_INTEL_optnone, SPV_INTEL_debug_module, SPV_INTEL_fp_fast_math_mode, - SPV_INTEL_memory_access_aliasing, SPV_INTEL_split_barrier, + SPV_INTEL_memory_access_aliasing, SPV_INTEL_split_barrier, SPV_INTEL_joint_matrix, SPV_NV_compute_shader_derivatives, SPV_NV_cooperative_matrix, SPV_NV_fragment_shader_barycentric, SPV_NV_geometry_shader_passthrough, SPV_NV_mesh_shader, SPV_NV_ray_tracing, SPV_NV_sample_mask_override_coverage, @@ -1390,6 +1391,12 @@ ]; } +def SPV_C_JointMatrixINTEL : I32EnumAttrCase<"JointMatrixINTEL", 6118> { + list availability = [ + Extension<[SPV_INTEL_joint_matrix]> + ]; +} + def SPV_CapabilityAttr : SPV_I32EnumAttr<"Capability", "valid SPIR-V Capability", "capability", [ SPV_C_Matrix, SPV_C_Addresses, SPV_C_Linkage, SPV_C_Kernel, SPV_C_Float16, @@ -1481,7 +1488,7 @@ SPV_C_UniformTexelBufferArrayNonUniformIndexing, SPV_C_StorageTexelBufferArrayNonUniformIndexing, SPV_C_ShaderViewportIndexLayerEXT, SPV_C_ShaderViewportMaskNV, - SPV_C_ShaderStereoViewNV + SPV_C_ShaderStereoViewNV, SPV_C_JointMatrixINTEL ]>; def SPV_AM_Logical : I32EnumAttrCase<"Logical", 0>; @@ -3981,6 +3988,16 @@ "image_sampler_use_info", [SPV_ISUI_SamplerUnknown, SPV_ISUI_NeedSampler, SPV_ISUI_NoSampler]>; +def SPV_ML_ColumnMajor : I32EnumAttrCase<"ColumnMajor", 0>; +def SPV_ML_RowMajor : I32EnumAttrCase<"RowMajor", 1>; +def SPV_ML_PackedA : I32EnumAttrCase<"PackedA", 2>; +def SPV_ML_PackedB : I32EnumAttrCase<"PackedB", 3>; + +def SPV_MatrixLayoutAttr : + SPV_I32EnumAttr<"MatrixLayout", "valid SPIR-V MatrixLayout", "matrixLayout", [ + SPV_ML_ColumnMajor, SPV_ML_RowMajor, SPV_ML_PackedA, SPV_ML_PackedB + ]>; + //===----------------------------------------------------------------------===// // SPIR-V attribute definitions //===----------------------------------------------------------------------===// @@ -4013,6 +4030,8 @@ def SPV_IsCooperativeMatrixType : CPred<"$_self.isa<::mlir::spirv::CooperativeMatrixNVType>()">; def SPV_IsImageType : CPred<"$_self.isa<::mlir::spirv::ImageType>()">; +def SPV_IsJointMatrixType : + CPred<"$_self.isa<::mlir::spirv::JointMatrixINTELType>()">; def SPV_IsMatrixType : CPred<"$_self.isa<::mlir::spirv::MatrixType>()">; def SPV_IsPtrType : CPred<"$_self.isa<::mlir::spirv::PointerType>()">; def SPV_IsRTArrayType : CPred<"$_self.isa<::mlir::spirv::RuntimeArrayType>()">; @@ -4043,6 +4062,8 @@ "any SPIR-V cooperative matrix type">; def SPV_AnyImage : DialectType; +def SPV_AnyJointMatrix : DialectType; def SPV_AnyMatrix : DialectType; def SPV_AnyRTArray : DialectType; def SPV_Composite : AnyTypeOf<[SPV_Vector, SPV_AnyArray, SPV_AnyRTArray, SPV_AnyStruct, - SPV_AnyCooperativeMatrix, SPV_AnyMatrix]>; + SPV_AnyCooperativeMatrix, SPV_AnyJointMatrix, SPV_AnyMatrix]>; def SPV_Type : AnyTypeOf<[ SPV_Void, SPV_Bool, SPV_Integer, SPV_Float, SPV_Vector, SPV_AnyPtr, SPV_AnyArray, SPV_AnyRTArray, SPV_AnyStruct, - SPV_AnyCooperativeMatrix, SPV_AnyMatrix, SPV_AnySampledImage + SPV_AnyCooperativeMatrix, SPV_AnyJointMatrix, SPV_AnyMatrix, + SPV_AnySampledImage ]>; def SPV_SignedInt : SignedIntOfWidths<[8, 16, 32, 64]>; @@ -4072,6 +4094,11 @@ "$_self.cast<::mlir::spirv::CooperativeMatrixNVType>().getElementType()", "Cooperative Matrix">; +class SPV_JointMatrixOfType allowedTypes> : + ContainerType, SPV_IsJointMatrixType, + "$_self.cast<::mlir::spirv::JointMatrixINTELType>().getElementType()", + "Joint Matrix">; + class SPV_ScalarOrVectorOf : AnyTypeOf<[type, VectorOfLengthAndType<[2, 3, 4, 8, 16], [type]>]>; @@ -4079,6 +4106,14 @@ AnyTypeOf<[type, VectorOfLengthAndType<[2, 3, 4, 8, 16], [type]>, SPV_CoopMatrixOfType<[type]>]>; +class SPV_ScalarOrVectorOrJointMatrixOf : + AnyTypeOf<[type, VectorOfLengthAndType<[2, 3, 4, 8, 16], [type]>, + SPV_JointMatrixOfType<[type]>]>; + +class SPV_ScalarOrVectorOrCoopMatrixOfOrJointMatrixOf : + AnyTypeOf<[type, VectorOfLengthAndType<[2, 3, 4, 8, 16], [type]>, + SPV_CoopMatrixOfType<[type]>, SPV_JointMatrixOfType<[type]> ]>; + def SPV_ScalarOrVector : AnyTypeOf<[SPV_Scalar, SPV_Vector]>; def SPV_ScalarOrVectorOrPtr : AnyTypeOf<[SPV_ScalarOrVector, SPV_AnyPtr]>; @@ -4311,6 +4346,11 @@ def SPV_OC_OpSubgroupBlockWriteINTEL : I32EnumAttrCase<"OpSubgroupBlockWriteINTEL", 5576>; def SPV_OC_OpAssumeTrueKHR : I32EnumAttrCase<"OpAssumeTrueKHR", 5630>; def SPV_OC_OpAtomicFAddEXT : I32EnumAttrCase<"OpAtomicFAddEXT", 6035>; +def SPV_OC_OpTypeJointMatrixINTEL : I32EnumAttrCase<"OpTypeJointMatrixINTEL", 6119>; +def SPV_OC_OpJointMatrixLoadINTEL : I32EnumAttrCase<"OpJointMatrixLoadINTEL", 6120>; +def SPV_OC_OpJointMatrixStoreINTEL : I32EnumAttrCase<"OpJointMatrixStoreINTEL", 6121>; +def SPV_OC_OpJointMatrixMadINTEL : I32EnumAttrCase<"OpJointMatrixMadINTEL", 6122>; +def SPV_OC_OpTypejointMatrixWorkItemLengthINTEL : I32EnumAttrCase<"OpJointMatrixWorkItemLengthINTEL", 6410>; def SPV_OpcodeAttr : SPV_I32EnumAttr<"Opcode", "valid SPIR-V instructions", "opcode", [ @@ -4376,7 +4416,10 @@ SPV_OC_OpCooperativeMatrixLoadNV, SPV_OC_OpCooperativeMatrixStoreNV, SPV_OC_OpCooperativeMatrixMulAddNV, SPV_OC_OpCooperativeMatrixLengthNV, SPV_OC_OpSubgroupBlockReadINTEL, SPV_OC_OpSubgroupBlockWriteINTEL, - SPV_OC_OpAssumeTrueKHR, SPV_OC_OpAtomicFAddEXT + SPV_OC_OpAssumeTrueKHR, SPV_OC_OpAtomicFAddEXT, + SPV_OC_OpTypeJointMatrixINTEL, SPV_OC_OpJointMatrixLoadINTEL, + SPV_OC_OpJointMatrixStoreINTEL, SPV_OC_OpJointMatrixMadINTEL, + SPV_OC_OpTypejointMatrixWorkItemLengthINTEL ]>; // End opcode section. Generated from SPIR-V spec; DO NOT MODIFY! diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td @@ -23,11 +23,11 @@ !listconcat(traits, [NoSideEffect, SameOperandsAndResultShape])> { let arguments = (ins - SPV_ScalarOrVectorOrCoopMatrixOf:$operand + SPV_ScalarOrVectorOrCoopMatrixOfOrJointMatrixOf:$operand ); let results = (outs - SPV_ScalarOrVectorOrCoopMatrixOf:$result + SPV_ScalarOrVectorOrCoopMatrixOfOrJointMatrixOf:$result ); let assemblyFormat = [{ $operand attr-dict `:` type($operand) `to` type($result) diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVJointMatrixOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVJointMatrixOps.td new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVJointMatrixOps.td @@ -0,0 +1,248 @@ +//===- SPIRVJointMatrixOps.td - joint matmul ---------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the op definition spec of joint matrix multiply extension ops. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SPIRV_IR_JOINT_MATRIX_OPS +#define MLIR_DIALECT_SPIRV_IR_JOINT_MATRIX_OPS + +// ----- + +def SPV_JointMatrixWorkItemLengthINTELOp : SPV_Op<"JointMatrixWorkItemLengthINTEL", + [NoSideEffect]> { + let summary = "See extension SPV_INTEL_joint_matrix"; + + let description = [{ + Return number of components owned by the current work-item in + a joint matrix. + + Result Type must be an 32-bit unsigned integer type scalar. + + Type is a joint matrix type. + + ``` {.ebnf} + joint-matrix-length-op ::= ssa-id `=` `spv.JointMatrixWorkItemLengthINTEL + ` : ` joint-matrix-type + ``` + + For example: + + ``` + %0 = spv.JointMatrixWorkItemLengthINTEL : !spv.jointmatrix + ``` + }]; + + let assemblyFormat = "attr-dict `:` $type"; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[SPV_INTEL_joint_matrix]>, + Capability<[SPV_C_JointMatrixINTEL]> + ]; + + let arguments = (ins + TypeAttr:$type + ); + + let results = (outs + SPV_Int32:$result + ); + let hasVerifier = 0; +} + +// ----- + +def SPV_JointMatrixLoadINTELOp : SPV_Op<"JointMatrixLoadINTEL", []> { + let summary = "See extension SPV_INTEL_joint_matrix"; + + let description = [{ + Load a matrix through a pointer. + + Result Type is the type of the loaded matrix. It must be OpTypeJointMatrixINTEL. + + Pointer is the pointer to load through. It specifies start of memory region where + elements of the matrix are stored and arranged according to Layout. + + Stride is the number of elements in memory between beginnings of successive rows, + columns (or words) in the result. It must be a scalar integer type. + + Layout indicates how the values loaded from memory are arranged. It must be the + result of a constant instruction. + + Scope is syncronization scope for operation on the matrix. It must be the result + of a constant instruction with scalar integer type. + + If present, any Memory Operands must begin with a memory operand literal. If not + present, it is the same as specifying the memory operand None. + + #### Example: + ```mlir + %0 = spv.JointMatrixLoadINTEL %ptr, %stride + {memory_access = #spv.memory_access} : + (!spv.ptr, i32) -> + !spv.jointmatrix<8x16xi32, ColumnMajor, Subgroup> + ``` + }]; + + let assemblyFormat = [{ + $scope $layout operands attr-dict `:` `(` type(operands) `)` `->` type($result) + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[SPV_INTEL_joint_matrix]>, + Capability<[SPV_C_JointMatrixINTEL]> + ]; + + let arguments = (ins + SPV_ScopeAttr:$scope, + SPV_MatrixLayoutAttr:$layout, + SPV_AnyPtr:$pointer, + SPV_Integer:$stride, + OptionalAttr:$memory_access, + OptionalAttr:$alignment + ); + + let results = (outs + SPV_AnyJointMatrix:$result + ); +} + +// ----- + +def SPV_JointMatrixMadINTELOp : SPV_Op<"JointMatrixMadINTEL", + [NoSideEffect, AllTypesMatch<["c", "result"]>]> { + let summary = "See extension SPV_INTEL_joint_matrix"; + + let description = [{ + Multiply matrix A by matrix B and add matrix C to the result + of the multiplication: A*B+C. Here A is a M x K matrix, B is + a K x N matrix and C is a M x N matrix. + + Behavior is undefined if sizes of operands do not meet the + conditions above. All operands and the Result Type must be + OpTypeJointMatrixINTEL. + + A must be a OpTypeJointMatrixINTEL whose Component Type is a + signed numerical type, Row Count equals to M and Column Count + equals to K + + B must be a OpTypeJointMatrixINTEL whose Component Type is a + signed numerical type, Row Count equals to K and Column Count + equals to N + + C and Result Type must be a OpTypeJointMatrixINTEL with Row + Count equals to M and Column Count equals to N + + Scope is syncronization scope for operation on the matrix. + It must be the result of a constant instruction with scalar + integer type. + + #### Example: + ```mlir + %r = spv.JointMatrixMadINTEL %a, %b, %c : + !spv.jointmatrix<8x32xi8, RowMajor, Subgroup>, + !spv.jointmatrix<32x8xi8, ColumnMajor, Subgroup> + -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + ``` + + }]; + + let assemblyFormat = [{ + $scope operands attr-dict`:` type($a) `,` type($b) `->` type($c) + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[SPV_INTEL_joint_matrix]>, + Capability<[SPV_C_JointMatrixINTEL]> + ]; + + let arguments = (ins + SPV_ScopeAttr:$scope, + SPV_AnyJointMatrix:$a, + SPV_AnyJointMatrix:$b, + SPV_AnyJointMatrix:$c + ); + + let results = (outs + SPV_AnyJointMatrix:$result + ); +} + +// ----- + +def SPV_JointMatrixStoreINTELOp : SPV_Op<"JointMatrixStoreINTEL", []> { + let summary = "See extension SPV_INTEL_joint_matrix"; + + let description = [{ + Store a matrix through a pointer. + + Pointer is the pointer to store through. It specifies + start of memory region where elements of the matrix must + be stored and arranged according to Layout. + + Object is the matrix to store. It must be + OpTypeJointMatrixINTEL. + + Stride is the number of elements in memory between beginnings + of successive rows, columns (or words) of the Object. It must + be a scalar integer type. + + Layout indicates how the values stored to memory are arranged. + It must be the result of a constant instruction. + + Scope is syncronization scope for operation on the matrix. + It must be the result of a constant instruction with scalar + integer type. + + If present, any Memory Operands must begin with a memory operand + literal. If not present, it is the same as specifying the memory + operand None. + + #### Example: + ```mlir + spv.JointMatrixStoreINTEL %ptr, %m, %stride + {memory_access = #spv.memory_access} : (!spv.ptr, + !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, i32) + ``` + + }]; + + let assemblyFormat = [{ + $scope $layout operands attr-dict `:` `(` type(operands) `)` + }]; + + let availability = [ + MinVersion, + MaxVersion, + Extension<[SPV_INTEL_joint_matrix]>, + Capability<[SPV_C_JointMatrixINTEL]> + ]; + + let arguments = (ins + SPV_ScopeAttr:$scope, + SPV_MatrixLayoutAttr:$layout, + SPV_AnyPtr:$pointer, + SPV_AnyJointMatrix:$object, + SPV_Integer:$stride, + OptionalAttr:$memory_access, + OptionalAttr:$alignment + ); + + let results = (outs); +} + +// ----- + +#endif // MLIR_DIALECT_SPIRV_IR_JOINT_MATRIX_OPS diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.td --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.td @@ -30,6 +30,7 @@ include "mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVControlFlowOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVCooperativeMatrixOps.td" +include "mlir/Dialect/SPIRV/IR/SPIRVJointMatrixOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVGLOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVGroupOps.td" include "mlir/Dialect/SPIRV/IR/SPIRVImageOps.td" diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVTypes.h b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVTypes.h --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVTypes.h +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVTypes.h @@ -29,6 +29,7 @@ struct ArrayTypeStorage; struct CooperativeMatrixTypeStorage; struct ImageTypeStorage; +struct JointMatrixTypeStorage; struct MatrixTypeStorage; struct PointerTypeStorage; struct RuntimeArrayTypeStorage; @@ -420,6 +421,33 @@ Optional storage = llvm::None); }; +// SPIR-V joint matrix type +class JointMatrixINTELType + : public Type::TypeBase { +public: + using Base::Base; + + static JointMatrixINTELType get(Type elementType, Scope scope, unsigned rows, + unsigned columns, MatrixLayout matrixLayout); + Type getElementType() const; + + /// Return the scope of the joint matrix. + Scope getScope() const; + /// return the number of rows of the matrix. + unsigned getRows() const; + /// return the number of columns of the matrix. + unsigned getColumns() const; + + /// return the layout of the matrix + MatrixLayout getMatrixLayout() const; + + void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions, + Optional storage = llvm::None); + void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities, + Optional storage = llvm::None); +}; + // SPIR-V matrix type class MatrixType : public Type::TypeBase { diff --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h --- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h @@ -36,6 +36,7 @@ #include #include #include +#include //===----------------------------------------------------------------------===// // Codegen-compatible structures for Vector type. @@ -209,13 +210,19 @@ template class StridedMemrefIterator { public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T *; + using reference = T &; + StridedMemrefIterator(StridedMemRefType &descriptor, int64_t offset = 0) - : offset(offset), descriptor(descriptor) {} + : offset(offset), descriptor(&descriptor) {} StridedMemrefIterator &operator++() { int dim = Rank - 1; - while (dim >= 0 && indices[dim] == (descriptor.sizes[dim] - 1)) { - offset -= indices[dim] * descriptor.strides[dim]; + while (dim >= 0 && indices[dim] == (descriptor->sizes[dim] - 1)) { + offset -= indices[dim] * descriptor->strides[dim]; indices[dim] = 0; --dim; } @@ -224,17 +231,17 @@ return *this; } ++indices[dim]; - offset += descriptor.strides[dim]; + offset += descriptor->strides[dim]; return *this; } - T &operator*() { return descriptor.data[offset]; } - T *operator->() { return &descriptor.data[offset]; } + reference operator*() { return descriptor->data[offset]; } + pointer operator->() { return &descriptor->data[offset]; } const std::array &getIndices() { return indices; } bool operator==(const StridedMemrefIterator &other) const { - return other.offset == offset && &other.descriptor == &descriptor; + return other.offset == offset && other.descriptor == descriptor; } bool operator!=(const StridedMemrefIterator &other) const { @@ -245,16 +252,24 @@ /// Offset in the buffer. This can be derived from the indices and the /// descriptor. int64_t offset = 0; + /// Array of indices in the multi-dimensional memref. std::array indices = {}; + /// Descriptor for the strided memref. - StridedMemRefType &descriptor; + StridedMemRefType *descriptor; }; /// Iterate over all elements in a 0-ranked strided memref. template class StridedMemrefIterator { public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T *; + using reference = T &; + StridedMemrefIterator(StridedMemRefType &descriptor, int64_t offset = 0) : elt(descriptor.data + offset) {} @@ -263,8 +278,8 @@ return *this; } - T &operator*() { return *elt; } - T *operator->() { return elt; } + reference operator*() { return *elt; } + pointer operator->() { return elt; } // There are no indices for a 0-ranked memref, but this API is provided for // consistency with the general case. @@ -301,10 +316,20 @@ //===----------------------------------------------------------------------===// // DynamicMemRefType type. //===----------------------------------------------------------------------===// +template +class DynamicMemRefIterator; + // A reference to one of the StridedMemRef types. template class DynamicMemRefType { public: + int64_t rank; + T *basePtr; + T *data; + int64_t offset; + const int64_t *sizes; + const int64_t *strides; + explicit DynamicMemRefType(const StridedMemRefType &memRef) : rank(0), basePtr(memRef.basePtr), data(memRef.data), offset(memRef.offset), sizes(nullptr), strides(nullptr) {} @@ -322,12 +347,113 @@ strides = sizes + rank; } - int64_t rank; - T *basePtr; - T *data; - int64_t offset; - const int64_t *sizes; - const int64_t *strides; + template ().begin())> + T &operator[](Range &&indices) { + assert(indices.size() == rank && + "indices should match rank in memref subscript"); + if (rank == 0) + return data[offset]; + + int64_t curOffset = offset; + for (int dim = rank - 1; dim >= 0; --dim) { + int64_t currentIndex = *(indices.begin() + dim); + assert(currentIndex < sizes[dim] && "Index overflow"); + curOffset += currentIndex * strides[dim]; + } + return data[curOffset]; + } + + DynamicMemRefIterator begin() { return {*this}; } + DynamicMemRefIterator end() { return {*this, -1}; } + + // This operator[] is extremely slow and only for sugaring purposes. + DynamicMemRefType operator[](int64_t idx) { + assert(rank > 0 && "can't make a subscript of a zero ranked array"); + + DynamicMemRefType res(*this); + --res.rank; + res.offset += idx * res.strides[0]; + ++res.sizes; + ++res.strides; + return res; + } + + // This operator* can be used in conjunction with the previous operator[] in + // order to access the underlying value in case of zero-ranked memref. + T &operator*() { + assert(rank == 0 && "not a zero-ranked memRef"); + return data[offset]; + } + +private: + DynamicMemRefType(const DynamicMemRefType &other) + : rank(other.rank), basePtr(other.basePtr), data(other.data), + offset(other.offset), strides(other.strides) {} +}; + +/// Iterate over all elements in a dynamic memref. +template +class DynamicMemRefIterator { +public: + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T *; + using reference = T &; + + DynamicMemRefIterator(DynamicMemRefType &descriptor, int64_t offset = 0) + : offset(offset), descriptor(&descriptor) { + indices.resize(descriptor.rank, 0); + } + + DynamicMemRefIterator &operator++() { + if (descriptor->rank == 0) { + offset = -1; + return *this; + } + + int dim = descriptor->rank - 1; + + while (dim >= 0 && indices[dim] == (descriptor->sizes[dim] - 1)) { + offset -= indices[dim] * descriptor->strides[dim]; + indices[dim] = 0; + --dim; + } + + if (dim < 0) { + offset = -1; + return *this; + } + + ++indices[dim]; + offset += descriptor->strides[dim]; + return *this; + } + + reference operator*() { return descriptor->data[offset]; } + pointer operator->() { return &descriptor->data[offset]; } + + const std::vector &getIndices() { return indices; } + + bool operator==(const DynamicMemRefIterator &other) const { + return other.offset == offset && other.descriptor == descriptor; + } + + bool operator!=(const DynamicMemRefIterator &other) const { + return !(*this == other); + } + +private: + /// Offset in the buffer. This can be derived from the indices and the + /// descriptor. + int64_t offset = 0; + + /// Array of indices in the multi-dimensional memref. + std::vector indices = {}; + + /// Descriptor for the dynamic memref. + DynamicMemRefType *descriptor; }; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -2288,8 +2288,11 @@ class ElementType : StrFunc<"getElementTypeOrSelf($" # name # ")">; class AllMatchPred values> : - CPred<"::llvm::is_splat(::llvm::makeArrayRef({" - # !interleave(values, ", ") #"}))">; + CPred; class AllMatch values, string summary> : PredOpTrait>; diff --git a/mlir/include/mlir/Transforms/TopologicalSortUtils.h b/mlir/include/mlir/Transforms/TopologicalSortUtils.h --- a/mlir/include/mlir/Transforms/TopologicalSortUtils.h +++ b/mlir/include/mlir/Transforms/TopologicalSortUtils.h @@ -90,11 +90,23 @@ function_ref isOperandReady = nullptr); /// Given a block, sort its operations in topological order, excluding its -/// terminator if it has one. +/// terminator if it has one. This sort is stable. bool sortTopologically( Block *block, function_ref isOperandReady = nullptr); +/// Compute a topological ordering of the given ops. All ops must belong to the +/// specified block. +/// +/// This sort is not stable. +/// +/// Note: If the specified ops contain incomplete/interrupted SSA use-def +/// chains, the result may not actually be a topological sorting with respect to +/// the entire program. +bool computeTopologicalSorting( + Block *block, MutableArrayRef ops, + function_ref isOperandReady = nullptr); + } // end namespace mlir #endif // MLIR_TRANSFORMS_TOPOLOGICALSORTUTILS_H diff --git a/mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp b/mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp --- a/mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/DenseAnalysis.cpp @@ -49,8 +49,6 @@ // Get the dense lattice to update. AbstractDenseLattice *after = getLattice(op); - if (after->isAtFixpoint()) - return; // If this op implements region control-flow, then control-flow dictates its // transfer function. @@ -91,8 +89,6 @@ // Get the dense lattice to update. AbstractDenseLattice *after = getLattice(block); - if (after->isAtFixpoint()) - return; // The dense lattices of entry blocks are set by region control-flow or the // callgraph. diff --git a/mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp b/mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp --- a/mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/SparseAnalysis.cpp @@ -87,16 +87,10 @@ // Get the result lattices. SmallVector resultLattices; resultLattices.reserve(op->getNumResults()); - // Track whether all results have reached their fixpoint. - bool allAtFixpoint = true; for (Value result : op->getResults()) { AbstractSparseLattice *resultLattice = getLatticeElement(result); - allAtFixpoint &= resultLattice->isAtFixpoint(); resultLattices.push_back(resultLattice); } - // If all result lattices have reached a fixpoint, there is nothing to do. - if (allAtFixpoint) - return; // The results of a region branch operation are determined by control-flow. if (auto branch = dyn_cast(op)) { @@ -145,16 +139,10 @@ // Get the argument lattices. SmallVector argLattices; argLattices.reserve(block->getNumArguments()); - bool allAtFixpoint = true; for (BlockArgument argument : block->getArguments()) { AbstractSparseLattice *argLattice = getLatticeElement(argument); - allAtFixpoint &= argLattice->isAtFixpoint(); argLattices.push_back(argLattice); } - // If all argument lattices have reached their fixpoints, then there is - // nothing to do. - if (allAtFixpoint) - return; // The argument lattices of entry blocks are set by region control-flow or the // callgraph. diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -113,15 +113,9 @@ /// A python-wrapped dense array attribute with an element type and a derived /// implementation class. template -class PyDenseArrayAttribute - : public PyConcreteAttribute> { +class PyDenseArrayAttribute : public PyConcreteAttribute { public: - static constexpr typename PyConcreteAttribute< - PyDenseArrayAttribute>::IsAFunctionTy isaFunction = - DerivedT::isaFunction; - static constexpr const char *pyClassName = DerivedT::pyClassName; - using PyConcreteAttribute< - PyDenseArrayAttribute>::PyConcreteAttribute; + using PyConcreteAttribute::PyConcreteAttribute; /// Iterator over the integer elements of a dense array. class PyDenseArrayIterator { @@ -158,33 +152,29 @@ EltTy getItem(intptr_t i) { return DerivedT::getElement(*this, i); } /// Bind the attribute class. - static void bindDerived(typename PyConcreteAttribute< - PyDenseArrayAttribute>::ClassTy &c) { + static void bindDerived(typename PyConcreteAttribute::ClassTy &c) { // Bind the constructor. c.def_static( "get", [](const std::vector &values, DefaultingPyMlirContext ctx) { MlirAttribute attr = DerivedT::getAttribute(ctx->get(), values.size(), values.data()); - return PyDenseArrayAttribute(ctx->getRef(), attr); + return DerivedT(ctx->getRef(), attr); }, py::arg("values"), py::arg("context") = py::none(), "Gets a uniqued dense array attribute"); // Bind the array methods. - c.def("__getitem__", - [](PyDenseArrayAttribute &arr, intptr_t i) { - if (i >= mlirDenseArrayGetNumElements(arr)) - throw py::index_error("DenseArray index out of range"); - return arr.getItem(i); - }); - c.def("__len__", [](const PyDenseArrayAttribute &arr) { - return mlirDenseArrayGetNumElements(arr); + c.def("__getitem__", [](DerivedT &arr, intptr_t i) { + if (i >= mlirDenseArrayGetNumElements(arr)) + throw py::index_error("DenseArray index out of range"); + return arr.getItem(i); }); - c.def("__iter__", [](const PyDenseArrayAttribute &arr) { - return PyDenseArrayIterator(arr); + c.def("__len__", [](const DerivedT &arr) { + return mlirDenseArrayGetNumElements(arr); }); - c.def("__add__", [](PyDenseArrayAttribute &arr, - py::list extras) { + c.def("__iter__", + [](const DerivedT &arr) { return PyDenseArrayIterator(arr); }); + c.def("__add__", [](DerivedT &arr, py::list extras) { std::vector values; intptr_t numOldElements = mlirDenseArrayGetNumElements(arr); values.reserve(numOldElements + py::len(extras)); @@ -194,7 +184,7 @@ values.push_back(pyTryCast(attr)); MlirAttribute attr = DerivedT::getAttribute(arr.getContext()->get(), values.size(), values.data()); - return PyDenseArrayAttribute(arr.getContext(), attr); + return DerivedT(arr.getContext(), attr); }); } }; diff --git a/mlir/lib/Conversion/ComplexToLibm/ComplexToLibm.cpp b/mlir/lib/Conversion/ComplexToLibm/ComplexToLibm.cpp --- a/mlir/lib/Conversion/ComplexToLibm/ComplexToLibm.cpp +++ b/mlir/lib/Conversion/ComplexToLibm/ComplexToLibm.cpp @@ -131,7 +131,8 @@ ConversionTarget target(getContext()); target.addLegalDialect(); target.addIllegalOp(); + complex::CosOp, complex::SinOp, complex::ConjOp, + complex::LogOp, complex::AbsOp, complex::AngleOp>(); if (failed(applyPartialConversion(module, target, std::move(patterns)))) signalPassFailure(); } diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -513,11 +513,28 @@ Value real = b.create(elementType, adaptor.getComplex()); Value imag = b.create(elementType, adaptor.getComplex()); + + Value half = b.create(elementType, + b.getFloatAttr(elementType, 0.5)); Value one = b.create(elementType, b.getFloatAttr(elementType, 1)); + Value two = b.create(elementType, + b.getFloatAttr(elementType, 2)); + + // log1p(a+bi) = .5*log((a+1)^2+b^2) + i*atan2(b, a + 1) + // log((a+1)+bi) = .5*log(a*a + 2*a + 1 + b*b) + i*atan2(b, a+1) + // log((a+1)+bi) = .5*log1p(a*a + 2*a + b*b) + i*atan2(b, a+1) + Value sumSq = b.create(real, real); + sumSq = b.create(sumSq, b.create(real, two)); + sumSq = b.create(sumSq, b.create(imag, imag)); + Value logSumSq = b.create(elementType, sumSq); + Value resultReal = b.create(logSumSq, half); + Value realPlusOne = b.create(real, one); - Value newComplex = b.create(type, realPlusOne, imag); - rewriter.replaceOpWithNewOp(op, type, newComplex); + + Value resultImag = b.create(elementType, imag, realPlusOne); + rewriter.replaceOpWithNewOp(op, type, resultReal, + resultImag); return success(); } }; diff --git a/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp b/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp --- a/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp +++ b/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp @@ -138,8 +138,9 @@ return success(); } -void mlir::populateMathToLibmConversionPatterns(RewritePatternSet &patterns, - PatternBenefit benefit) { +void mlir::populateMathToLibmConversionPatterns( + RewritePatternSet &patterns, PatternBenefit benefit, + llvm::Optional log1pBenefit) { patterns.add, VecOpToScalarOp, VecOpToScalarOp, VecOpToScalarOp, VecOpToScalarOp, VecOpToScalarOp, @@ -168,6 +169,8 @@ "cos", benefit); patterns.add>(patterns.getContext(), "sinf", "sin", benefit); + patterns.add>( + patterns.getContext(), "log1pf", "log1p", log1pBenefit.value_or(benefit)); } namespace { diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -399,13 +399,13 @@ assert(areVarsUnique(*a) && "A's values aren't unique"); assert(areVarsUnique(*b) && "B's values aren't unique"); - assert(std::all_of(a->getMaybeValues().begin() + offset, - a->getMaybeValues().end(), - [](Optional var) { return var.has_value(); })); + assert( + llvm::all_of(llvm::drop_begin(a->getMaybeValues(), offset), + [](const Optional &var) { return var.has_value(); })); - assert(std::all_of(b->getMaybeValues().begin() + offset, - b->getMaybeValues().end(), - [](Optional var) { return var.has_value(); })); + assert( + llvm::all_of(llvm::drop_begin(b->getMaybeValues(), offset), + [](const Optional &var) { return var.has_value(); })); SmallVector aDimValues; a->getValues(offset, a->getNumDimVars(), &aDimValues); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp @@ -140,6 +140,15 @@ return WalkResult::skip(); Value allocTensor = maybeAllocTensor.front(); + // Replace only if the types match. + // TODO: This could be extended to support IR such as: + // %0 = bufferization.alloc_tensor : tensor<128xf32> + // %1 = "some_op"(%0) : (tensor<128xf32>) -> (tensor<128xf32>) + // %2 = tensor.expand_shape %1 ... + // %3 = tensor.insert_slice %2 into ... + if (allocTensor.getType() != operand.get().getType()) + return WalkResult::skip(); + // Find a suitable insertion point. Operation *insertionPoint = findValidInsertionPoint(allocTensor.getDefiningOp(), neededValues); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -450,6 +450,31 @@ applyPermutationToVector(iteratorTypes, permutation); } + // Handle distribution. Create a vector of the same size of loops that are to + // be tiled. + SmallVector procInfo; + if (options.distribution) { + procInfo.resize( + iteratorTypes.size(), + linalg::ProcInfo{nullptr, nullptr, linalg::DistributionMethod::None}); + // Collect loop ranges of tiled loopss, loops that are parallel. + SmallVector parallelLoopRanges; + for (auto iteratorType : llvm::enumerate(iteratorTypes)) { + if (!isParallelIterator(iteratorType.value())) + break; + parallelLoopRanges.push_back(loopRanges[iteratorType.index()]); + } + auto returnedProcInfo = + options.distribution->procInfo(b, op.getLoc(), parallelLoopRanges); + unsigned procIdIdx = 0; + // Update the distribution information for the loops. + for (auto iteratorType : llvm::enumerate(iteratorTypes)) { + if (!isParallelIterator(iteratorType.value())) + break; + procInfo[iteratorType.index()] = returnedProcInfo[procIdIdx++]; + } + } + // 2. Create the tiled loops. LinalgOp res = op; SmallVector ivs, tensorResults; @@ -489,8 +514,7 @@ return scf::ValueVector(tensorResults.begin(), tensorResults.end()); }; GenerateLoopNest::doit(b, op.getLoc(), loopRanges, op, iteratorTypes, - tiledLoopBodyBuilder, options.distribution, - options.distributionTypes); + tiledLoopBodyBuilder, procInfo); // 3. Transform IndexOp results w.r.t. the tiling. transformIndexOps(b, res, ivs, loopIndexToRangeIndex); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -518,25 +518,11 @@ function_ref bodyBuilderFn, - Optional distributionOptions, - ArrayRef distributionTypes) { + ArrayRef procInfo) { + assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) && + "expected as many entries for proc info as number of loops, even if " + "they are null entries"); SmallVector iterArgInitValues = linalgOp.getOutputTensorOperands(); - // Create procInfo so it dominates loops, if appropriate. - SmallVector procInfo; - SmallVector distributionMethod; - if (distributionOptions) { - // Collect loop ranges for parallel dimensions. - SmallVector parallelLoopRanges; - for (const auto &iteratorType : enumerate(iteratorTypes)) - if (isParallelIterator(iteratorType.value())) - parallelLoopRanges.push_back(loopRanges[iteratorType.index()]); - - // Get their distribution schemes. - distributionMethod = distributionOptions->distributionMethod; - if (distributionMethod.size() < parallelLoopRanges.size()) - parallelLoopRanges.resize(distributionMethod.size()); - procInfo = distributionOptions->procInfo(b, loc, parallelLoopRanges); - } SmallVector lbs, ubs, steps; unpackRanges(b, loc, loopRanges, lbs, ubs, steps); @@ -554,20 +540,17 @@ return bodyBuilderFn(b, loc, ivs, operandValuesToUse); }); - if (!distributionOptions || loopNest.loops.empty()) + if (loopNest.loops.empty() || procInfo.empty()) return; // Filter out scf.for loops that were created out of parallel dimensions. - SmallVector loops; - for (const auto &iteratorType : enumerate(iteratorTypes)) - if (isParallelIterator(iteratorType.value())) - loops.push_back(loopNest.loops[iteratorType.index()]); - - // Distribute - only supports cyclic distribution for now. - for (auto it : llvm::zip(loops, procInfo, distributionMethod)) - if (std::get<2>(it) == DistributionMethod::Cyclic) - mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId, - std::get<1>(it).nprocs); + for (auto loop : llvm::enumerate(loopNest.loops)) { + if (procInfo[loop.index()].distributionMethod == + DistributionMethod::Cyclic) { + mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId, + procInfo[loop.index()].nprocs); + } + } } /// Specialization to build affine "for" nest. @@ -578,7 +561,7 @@ function_ref bodyBuilderFn, - Optional, ArrayRef) { + ArrayRef /*procInfo*/) { SmallVector iterArgInitValues = linalgOp.getOutputTensorOperands(); assert(iterArgInitValues.empty() && "unexpected AffineForOp init values"); SmallVector lbs, ubs, steps; @@ -625,12 +608,13 @@ static void generateParallelLoopNest( OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef iteratorTypes, + ArrayRef procInfo, function_ref bodyBuilderFn, - SmallVectorImpl &ivStorage, - ArrayRef distributionMethod = {}) { + SmallVectorImpl &ivStorage) { assert(lbs.size() == ubs.size()); assert(lbs.size() == steps.size()); assert(lbs.size() == iteratorTypes.size()); + assert(procInfo.empty() || (lbs.size() == procInfo.size())); // If there are no (more) loops to be generated, generate the body and be // done with it. @@ -639,55 +623,56 @@ return; } - // Find the outermost parallel loops and drop their types from the list. - unsigned nLoops = iteratorTypes.size(); - unsigned nOuterPar = - nLoops - iteratorTypes.drop_while(isParallelIterator).size(); - // If there are no outer parallel loops, generate one sequential loop and - // recurse. Note that we wouldn't have dropped anything from `iteratorTypes` - // in this case. - if (nOuterPar == 0) { + // recurse. + if (!isParallelIterator(iteratorTypes.front())) { LoopNest singleLoop = buildLoopNest( b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(), [&](OpBuilder &b, Location loc, ValueRange ivs) { ivStorage.append(ivs.begin(), ivs.end()); - generateParallelLoopNest(b, loc, lbs.drop_front(), ubs.drop_front(), - steps.drop_front(), - iteratorTypes.drop_front(), bodyBuilderFn, - ivStorage, distributionMethod); + generateParallelLoopNest( + b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(), + iteratorTypes.drop_front(), + procInfo.empty() ? procInfo : procInfo.drop_front(), + bodyBuilderFn, ivStorage); }); return; } - if (distributionMethod.empty()) { + + unsigned nLoops = iteratorTypes.size(); + unsigned numProcessed = 0; + DistributionMethod distributionMethod = DistributionMethod::None; + if (procInfo.empty()) { + numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size(); + } else { + distributionMethod = procInfo.front().distributionMethod; + numProcessed = + nLoops - procInfo + .drop_while([&](linalg::ProcInfo p) { + return p.distributionMethod == distributionMethod; + }) + .size(); + } + + auto remainderProcInfo = + procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed); + switch (distributionMethod) { + case DistributionMethod::None: { // Generate a single parallel loop-nest operation for all outermost // parallel loops and recurse. b.create( - loc, lbs.take_front(nOuterPar), ubs.take_front(nOuterPar), - steps.take_front(nOuterPar), + loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed), + steps.take_front(numProcessed), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) { ivStorage.append(localIvs.begin(), localIvs.end()); generateParallelLoopNest( - nestedBuilder, nestedLoc, lbs.drop_front(nOuterPar), - ubs.drop_front(nOuterPar), steps.drop_front(nOuterPar), - iteratorTypes.drop_front(nOuterPar), bodyBuilderFn, ivStorage, - (distributionMethod.size() < nOuterPar) - ? ArrayRef() - : distributionMethod.drop_front(nOuterPar)); + nestedBuilder, nestedLoc, lbs.drop_front(numProcessed), + ubs.drop_front(numProcessed), steps.drop_front(numProcessed), + iteratorTypes.drop_front(numProcessed), remainderProcInfo, + bodyBuilderFn, ivStorage); }); return; } - - // Process all consecutive similarly distributed loops simultaneously. - DistributionMethod methodToUse = distributionMethod[0]; - unsigned numProcessed = 1; - for (unsigned i = 1; i < nOuterPar && i < distributionMethod.size(); ++i) { - if (distributionMethod[i] != methodToUse) - break; - numProcessed++; - } - - switch (methodToUse) { case DistributionMethod::Cyclic: { // Generate a single parallel loop-nest operation for all outermost // parallel loops and recurse. @@ -699,10 +684,8 @@ generateParallelLoopNest( nestedBuilder, nestedLoc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed), steps.drop_front(numProcessed), - iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage, - (distributionMethod.size() < numProcessed) - ? ArrayRef() - : distributionMethod.drop_front(numProcessed)); + iteratorTypes.drop_front(numProcessed), remainderProcInfo, + bodyBuilderFn, ivStorage); }); return; } @@ -714,11 +697,11 @@ cond = ab._and(cond, ab.slt(lbs[i], ubs[i])); ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed)); b.create(loc, cond, [&](OpBuilder &b, Location loc) { - generateParallelLoopNest( - b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed), - steps.drop_front(numProcessed), - iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage, - distributionMethod.drop_front(numProcessed)); + generateParallelLoopNest(b, loc, lbs.drop_front(numProcessed), + ubs.drop_front(numProcessed), + steps.drop_front(numProcessed), + iteratorTypes.drop_front(numProcessed), + remainderProcInfo, bodyBuilderFn, ivStorage); b.create(loc, ValueRange{}); }); return; @@ -730,7 +713,7 @@ generateParallelLoopNest( b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed), steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed), - bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed)); + remainderProcInfo, bodyBuilderFn, ivStorage); return; } } @@ -743,13 +726,14 @@ function_ref bodyBuilderFn, - Optional distributionOptions, - ArrayRef distributionTypes) { + ArrayRef procInfo) { SmallVector iterArgInitValues = linalgOp.getOutputTensorOperands(); assert(iterArgInitValues.empty() && "unexpected ParallelOp init values"); // This function may be passed more iterator types than ranges. assert(iteratorTypes.size() >= loopRanges.size() && "expected iterator type for all ranges"); + assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) && + "expected proc information for all loops when present"); iteratorTypes = iteratorTypes.take_front(loopRanges.size()); SmallVector lbsStorage, ubsStorage, stepsStorage, ivs; unsigned numLoops = iteratorTypes.size(); @@ -762,42 +746,22 @@ unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage); // Modify the lb, ub, and step based on the distribution options. - SmallVector distributionMethod; - if (distributionOptions) { - auto &options = *distributionOptions; - distributionMethod.assign(distributionOptions->distributionMethod.begin(), - distributionOptions->distributionMethod.end()); - SmallVector parallelLoopRanges; - for (const auto &iteratorType : enumerate(iteratorTypes)) { - if (isParallelIterator(iteratorType.value())) - parallelLoopRanges.push_back(loopRanges[iteratorType.index()]); - } - if (distributionMethod.size() < parallelLoopRanges.size()) - parallelLoopRanges.resize(distributionMethod.size()); - SmallVector procInfo = - options.procInfo(b, loc, parallelLoopRanges); - unsigned index = 0; - for (const auto &iteratorType : enumerate(iteratorTypes)) { - if (index >= procInfo.size()) - break; - if (isParallelIterator(iteratorType.value())) { - unsigned i = iteratorType.index(); - updateBoundsForCyclicDistribution(b, loc, procInfo[index].procId, - procInfo[index].nprocs, lbsStorage[i], - ubsStorage[i], stepsStorage[i]); - index++; - } + for (auto it : llvm::enumerate(procInfo)) { + if (it.value().distributionMethod != linalg::DistributionMethod::None) { + updateBoundsForCyclicDistribution( + b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()], + ubsStorage[it.index()], stepsStorage[it.index()]); } } ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage); generateParallelLoopNest( - b, loc, lbs, ubs, steps, iteratorTypes, + b, loc, lbs, ubs, steps, iteratorTypes, procInfo, [&](OpBuilder &b, Location loc, ValueRange ivs) { SmallVector operandValuesToUse = linalgOp.getInputAndOutputOperands(); bodyBuilderFn(b, loc, ivs, operandValuesToUse); }, - ivs, distributionMethod); + ivs); assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops"); } diff --git a/mlir/lib/Dialect/Math/IR/MathOps.cpp b/mlir/lib/Dialect/Math/IR/MathOps.cpp --- a/mlir/lib/Dialect/Math/IR/MathOps.cpp +++ b/mlir/lib/Dialect/Math/IR/MathOps.cpp @@ -134,6 +134,56 @@ }); } +//===----------------------------------------------------------------------===// +// IPowIOp folder +//===----------------------------------------------------------------------===// + +OpFoldResult math::IPowIOp::fold(ArrayRef operands) { + return constFoldBinaryOpConditional( + operands, [](const APInt &base, const APInt &power) -> Optional { + unsigned width = base.getBitWidth(); + auto zeroValue = APInt::getZero(width); + APInt oneValue{width, 1ULL, /*isSigned=*/true}; + APInt minusOneValue{width, -1ULL, /*isSigned=*/true}; + + if (power.isZero()) + return oneValue; + + if (power.isNegative()) { + // Leave 0 raised to negative power not folded. + if (base.isZero()) + return {}; + if (base.eq(oneValue)) + return oneValue; + // If abs(base) > 1, then the result is zero. + if (base.ne(minusOneValue)) + return zeroValue; + // base == -1: + // -1: power is odd + // 1: power is even + if (power[0] == 1) + return minusOneValue; + + return oneValue; + } + + // power is positive. + APInt result = oneValue; + APInt curBase = base; + APInt curPower = power; + while (true) { + if (curPower[0] == 1) + result *= curBase; + curPower.lshrInPlace(1); + if (curPower.isZero()) + return result; + curBase *= curBase; + } + }); + + return Attribute(); +} + //===----------------------------------------------------------------------===// // LogOp folder //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp b/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp --- a/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp +++ b/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp @@ -112,9 +112,111 @@ return failure(); } +//----------------------------------------------------------------------------// +// FPowIOp/IPowIOp strength reduction. +//----------------------------------------------------------------------------// + +namespace { +template +struct PowIStrengthReduction : public OpRewritePattern { + + unsigned exponentThreshold; + +public: + PowIStrengthReduction(MLIRContext *context, unsigned exponentThreshold = 3, + PatternBenefit benefit = 1, + ArrayRef generatedNames = {}) + : OpRewritePattern(context, benefit, generatedNames), + exponentThreshold(exponentThreshold) {} + + LogicalResult matchAndRewrite(PowIOpTy op, + PatternRewriter &rewriter) const final; +}; +} // namespace + +template +LogicalResult +PowIStrengthReduction::matchAndRewrite( + PowIOpTy op, PatternRewriter &rewriter) const { + Location loc = op.getLoc(); + Value base = op.getLhs(); + + IntegerAttr scalarExponent; + DenseIntElementsAttr vectorExponent; + + bool isScalar = matchPattern(op.getRhs(), m_Constant(&scalarExponent)); + bool isVector = matchPattern(op.getRhs(), m_Constant(&vectorExponent)); + + // Simplify cases with known exponent value. + int64_t exponentValue = 0; + if (isScalar) + exponentValue = scalarExponent.getInt(); + else if (isVector && vectorExponent.isSplat()) + exponentValue = vectorExponent.getSplatValue().getInt(); + else + return failure(); + + // Maybe broadcasts scalar value into vector type compatible with `op`. + auto bcast = [&loc, &op, &rewriter](Value value) -> Value { + if (auto vec = op.getType().template dyn_cast()) + return rewriter.create(loc, vec, value); + return value; + }; + + Value one; + Type opType = getElementTypeOrSelf(op.getType()); + if constexpr (std::is_same_v) + one = rewriter.create( + loc, rewriter.getFloatAttr(opType, 1.0)); + else + one = rewriter.create( + loc, rewriter.getIntegerAttr(opType, 1)); + + // Replace `[fi]powi(x, 0)` with `1`. + if (exponentValue == 0) { + rewriter.replaceOp(op, bcast(one)); + return success(); + } + + bool exponentIsNegative = false; + if (exponentValue < 0) { + exponentIsNegative = true; + exponentValue *= -1; + } + + // Bail out if `abs(exponent)` exceeds the threshold. + if (exponentValue > exponentThreshold) + return failure(); + + // Inverse the base for negative exponent, i.e. for + // `[fi]powi(x, negative_exponent)` set `x` to `1 / x`. + if (exponentIsNegative) + base = rewriter.create(loc, bcast(one), base); + + Value result = base; + // Transform to naive sequence of multiplications: + // * For positive exponent case replace: + // `[fi]powi(x, positive_exponent)` + // with: + // x * x * x * ... + // * For negative exponent case replace: + // `[fi]powi(x, negative_exponent)` + // with: + // (1 / x) * (1 / x) * (1 / x) * ... + for (unsigned i = 1; i < exponentValue; ++i) + result = rewriter.create(loc, result, base); + + rewriter.replaceOp(op, result); + return success(); +} + //----------------------------------------------------------------------------// void mlir::populateMathAlgebraicSimplificationPatterns( RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); + patterns + .add, + PowIStrengthReduction>( + patterns.getContext()); } diff --git a/mlir/lib/Dialect/NVGPU/Transforms/MmaSyncTF32Transform.cpp b/mlir/lib/Dialect/NVGPU/Transforms/MmaSyncTF32Transform.cpp --- a/mlir/lib/Dialect/NVGPU/Transforms/MmaSyncTF32Transform.cpp +++ b/mlir/lib/Dialect/NVGPU/Transforms/MmaSyncTF32Transform.cpp @@ -42,7 +42,8 @@ PatternRewriter &rewrite) const override { Location location = op->getLoc(); - if (op->hasAttr(op.getTf32EnabledAttrName())) + if (op->hasAttr(op.getTf32EnabledAttrName()) || + !op.getMatrixA().getType().cast().getElementType().isF32()) return failure(); if (precision == MmaSyncF32Lowering::Unkown) diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp @@ -348,6 +348,39 @@ return CooperativeMatrixNVType::get(elementTy, scope, dims[0], dims[1]); } +// joint-matrix-type ::= `!spv.jointmatrix` `<`rows `x` columns `x` element-type +// `,` layout `,` scope`>` +static Type parseJointMatrixType(SPIRVDialect const &dialect, + DialectAsmParser &parser) { + if (parser.parseLess()) + return Type(); + + SmallVector dims; + SMLoc countLoc = parser.getCurrentLocation(); + if (parser.parseDimensionList(dims, /*allowDynamic=*/false)) + return Type(); + + if (dims.size() != 2) { + parser.emitError(countLoc, "expected rows and columns size"); + return Type(); + } + + auto elementTy = parseAndVerifyType(dialect, parser); + if (!elementTy) + return Type(); + MatrixLayout matrixLayout; + if (parser.parseComma() || + parseEnumKeywordAttr(matrixLayout, parser, "matrixLayout ")) + return Type(); + Scope scope; + if (parser.parseComma() || parseEnumKeywordAttr(scope, parser, "scope ")) + return Type(); + if (parser.parseGreater()) + return Type(); + return JointMatrixINTELType::get(elementTy, scope, dims[0], dims[1], + matrixLayout); +} + // TODO: Reorder methods to be utilities first and parse*Type // methods in alphabetical order // @@ -753,6 +786,8 @@ return parseArrayType(*this, parser); if (keyword == "coopmatrix") return parseCooperativeMatrixType(*this, parser); + if (keyword == "jointmatrix") + return parseJointMatrixType(*this, parser); if (keyword == "image") return parseImageType(*this, parser); if (keyword == "ptr") @@ -859,6 +894,13 @@ os << ">"; } +static void print(JointMatrixINTELType type, DialectAsmPrinter &os) { + os << "jointmatrix<" << type.getRows() << "x" << type.getColumns() << "x"; + os << type.getElementType() << ", " + << stringifyMatrixLayout(type.getMatrixLayout()); + os << ", " << stringifyScope(type.getScope()) << ">"; +} + static void print(MatrixType type, DialectAsmPrinter &os) { os << "matrix<" << type.getNumColumns() << " x " << type.getColumnType(); os << ">"; @@ -866,9 +908,9 @@ void SPIRVDialect::printType(Type type, DialectAsmPrinter &os) const { TypeSwitch(type) - .Case( - [&](auto type) { print(type, os); }) + .Case([&](auto type) { print(type, os); }) .Default([](Type) { llvm_unreachable("unhandled SPIR-V type"); }); } diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -436,6 +436,13 @@ resultType.cast().getElementType(); } + if (auto jointMatrixType = + operandType.dyn_cast()) { + operandType = jointMatrixType.getElementType(); + resultType = + resultType.cast().getElementType(); + } + auto operandTypeBitWidth = operandType.getIntOrFloatBitWidth(); auto resultTypeBitWidth = resultType.getIntOrFloatBitWidth(); auto isSameBitWidth = operandTypeBitWidth == resultTypeBitWidth; @@ -1637,6 +1644,17 @@ return success(); } + if (auto jointType = cType.dyn_cast()) { + if (constituents.size() != 1) + return emitOpError("has incorrect number of operands: expected ") + << "1, but provided " << constituents.size(); + if (jointType.getElementType() != constituents.front().getType()) + return emitOpError("operand type mismatch: expected operand type ") + << jointType.getElementType() << ", but provided " + << constituents.front().getType(); + return success(); + } + if (constituents.size() == cType.getNumElements()) { for (auto index : llvm::seq(0, constituents.size())) { if (constituents[index].getType() != cType.getElementType(index)) { @@ -3893,6 +3911,70 @@ return verifyCoopMatrixMulAdd(*this); } +static LogicalResult +verifyPointerAndJointMatrixType(Operation *op, Type pointer, Type jointMatrix) { + Type pointeeType = pointer.cast().getPointeeType(); + if (!pointeeType.isa() && !pointeeType.isa()) + return op->emitError( + "Pointer must point to a scalar or vector type but provided ") + << pointeeType; + spirv::StorageClass storage = + pointer.cast().getStorageClass(); + if (storage != spirv::StorageClass::Workgroup && + storage != spirv::StorageClass::CrossWorkgroup) + return op->emitError("Pointer storage class must be Workgroup or " + "CrossWorkgroup but provided ") + << stringifyStorageClass(storage); + return success(); +} + +//===----------------------------------------------------------------------===// +// spv.JointMatrixLoadINTEL +//===----------------------------------------------------------------------===// + +LogicalResult spirv::JointMatrixLoadINTELOp::verify() { + return verifyPointerAndJointMatrixType(*this, pointer().getType(), + result().getType()); +} + +//===----------------------------------------------------------------------===// +// spv.JointMatrixStoreINTEL +//===----------------------------------------------------------------------===// + +LogicalResult spirv::JointMatrixStoreINTELOp::verify() { + return verifyPointerAndJointMatrixType(*this, pointer().getType(), + object().getType()); +} + +//===----------------------------------------------------------------------===// +// spv.JointMatrixMadINTEL +//===----------------------------------------------------------------------===// + +static LogicalResult verifyJointMatrixMad(spirv::JointMatrixMadINTELOp op) { + if (op.c().getType() != op.result().getType()) + return op.emitOpError("result and third operand must have the same type"); + auto typeA = op.a().getType().cast(); + auto typeB = op.b().getType().cast(); + auto typeC = op.c().getType().cast(); + auto typeR = op.result().getType().cast(); + if (typeA.getRows() != typeR.getRows() || + typeA.getColumns() != typeB.getRows() || + typeB.getColumns() != typeR.getColumns()) + return op.emitOpError("matrix size must match"); + if (typeR.getScope() != typeA.getScope() || + typeR.getScope() != typeB.getScope() || + typeR.getScope() != typeC.getScope()) + return op.emitOpError("matrix scope must match"); + if (typeA.getElementType() != typeB.getElementType() || + typeR.getElementType() != typeC.getElementType()) + return op.emitOpError("matrix element type must match"); + return success(); +} + +LogicalResult spirv::JointMatrixMadINTELOp::verify() { + return verifyJointMatrixMad(*this); +} + //===----------------------------------------------------------------------===// // spv.MatrixTimesScalar //===----------------------------------------------------------------------===// @@ -4150,6 +4232,8 @@ if (cType.isa()) return emitError("unsupported composite type ") << cType; + if (cType.isa()) + return emitError("unsupported composite type ") << cType; if (constituents.size() != cType.getNumElements()) return emitError("has incorrect number of operands: expected ") << cType.getNumElements() << ", but provided " diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp @@ -89,9 +89,9 @@ bool CompositeType::classof(Type type) { if (auto vectorType = type.dyn_cast()) return isValid(vectorType); - return type - .isa(); + return type.isa(); } bool CompositeType::isValid(VectorType type) { @@ -110,7 +110,8 @@ Type CompositeType::getElementType(unsigned index) const { return TypeSwitch(*this) - .Case( + .Case( [](auto type) { return type.getElementType(); }) .Case([](MatrixType type) { return type.getColumnType(); }) .Case( @@ -132,6 +133,10 @@ llvm_unreachable( "invalid to query number of elements of spirv::CooperativeMatrix type"); } + if (isa()) { + llvm_unreachable( + "invalid to query number of elements of spirv::JointMatrix type"); + } if (isa()) { llvm_unreachable( "invalid to query number of elements of spirv::RuntimeArray type"); @@ -140,15 +145,16 @@ } bool CompositeType::hasCompileTimeKnownNumElements() const { - return !isa(); + return !isa(); } void CompositeType::getExtensions( SPIRVType::ExtensionArrayRefVector &extensions, Optional storage) { TypeSwitch(*this) - .Case( + .Case( [&](auto type) { type.getExtensions(extensions, storage); }) .Case([&](VectorType type) { return type.getElementType().cast().getExtensions( @@ -161,8 +167,8 @@ SPIRVType::CapabilityArrayRefVector &capabilities, Optional storage) { TypeSwitch(*this) - .Case( + .Case( [&](auto type) { type.getCapabilities(capabilities, storage); }) .Case([&](VectorType type) { auto vecSize = getNumElements(); @@ -255,6 +261,74 @@ capabilities.push_back(ref); } +//===----------------------------------------------------------------------===// +// JointMatrixType +//===----------------------------------------------------------------------===// + +struct spirv::detail::JointMatrixTypeStorage : public TypeStorage { + using KeyTy = std::tuple; + + static JointMatrixTypeStorage *construct(TypeStorageAllocator &allocator, + const KeyTy &key) { + return new (allocator.allocate()) + JointMatrixTypeStorage(key); + } + + bool operator==(const KeyTy &key) const { + return key == KeyTy(elementType, rows, columns, matrixLayout, scope); + } + + JointMatrixTypeStorage(const KeyTy &key) + : elementType(std::get<0>(key)), rows(std::get<1>(key)), + columns(std::get<2>(key)), scope(std::get<4>(key)), + matrixLayout(std::get<3>(key)) {} + + Type elementType; + unsigned rows; + unsigned columns; + Scope scope; + MatrixLayout matrixLayout; +}; + +JointMatrixINTELType JointMatrixINTELType::get(Type elementType, Scope scope, + unsigned rows, unsigned columns, + MatrixLayout matrixLayout) { + return Base::get(elementType.getContext(), elementType, rows, columns, + matrixLayout, scope); +} + +Type JointMatrixINTELType::getElementType() const { + return getImpl()->elementType; +} + +Scope JointMatrixINTELType::getScope() const { return getImpl()->scope; } + +unsigned JointMatrixINTELType::getRows() const { return getImpl()->rows; } + +unsigned JointMatrixINTELType::getColumns() const { return getImpl()->columns; } + +MatrixLayout JointMatrixINTELType::getMatrixLayout() const { + return getImpl()->matrixLayout; +} + +void JointMatrixINTELType::getExtensions( + SPIRVType::ExtensionArrayRefVector &extensions, + Optional storage) { + getElementType().cast().getExtensions(extensions, storage); + static const Extension exts[] = {Extension::SPV_INTEL_joint_matrix}; + ArrayRef ref(exts, llvm::array_lengthof(exts)); + extensions.push_back(ref); +} + +void JointMatrixINTELType::getCapabilities( + SPIRVType::CapabilityArrayRefVector &capabilities, + Optional storage) { + getElementType().cast().getCapabilities(capabilities, storage); + static const Capability caps[] = {Capability::JointMatrixINTEL}; + ArrayRef ref(caps, llvm::array_lengthof(caps)); + capabilities.push_back(ref); +} + //===----------------------------------------------------------------------===// // ImageType //===----------------------------------------------------------------------===// @@ -1172,6 +1246,7 @@ //===----------------------------------------------------------------------===// void SPIRVDialect::registerTypes() { - addTypes(); + addTypes(); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -52,17 +52,16 @@ /// the "_emit_c_interface" on the function declaration when requested, /// so that LLVM lowering generates a wrapper function that takes care /// of ABI complications with passing in and returning MemRefs to C functions. -static FlatSymbolRefAttr getFunc(Operation *op, StringRef name, +static FlatSymbolRefAttr getFunc(ModuleOp module, StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface) { - MLIRContext *context = op->getContext(); - auto module = op->getParentOfType(); + MLIRContext *context = module.getContext(); auto result = SymbolRefAttr::get(context, name); auto func = module.lookupSymbol(result.getAttr()); if (!func) { OpBuilder moduleBuilder(module.getBodyRegion()); func = moduleBuilder.create( - op->getLoc(), name, + module.getLoc(), name, FunctionType::get(context, operands.getTypes(), resultType)); func.setPrivate(); if (static_cast(emitCInterface)) @@ -72,13 +71,15 @@ return result; } -/// Creates a `CallOp` to the function reference returned by `getFunc()`. -static func::CallOp createFuncCall(OpBuilder &builder, Operation *op, +/// Creates a `CallOp` to the function reference returned by `getFunc()` in +/// the builder's module. +static func::CallOp createFuncCall(OpBuilder &builder, Location loc, StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface) { - auto fn = getFunc(op, name, resultType, operands, emitCInterface); - return builder.create(op->getLoc(), resultType, fn, operands); + auto module = builder.getBlock()->getParentOp()->getParentOfType(); + auto fn = getFunc(module, name, resultType, operands, emitCInterface); + return builder.create(loc, resultType, fn, operands); } /// Replaces the `op` with a `CallOp` to the function reference returned @@ -87,13 +88,14 @@ StringRef name, TypeRange resultType, ValueRange operands, EmitCInterface emitCInterface) { - auto fn = getFunc(op, name, resultType, operands, emitCInterface); + auto fn = getFunc(op->getParentOfType(), name, resultType, operands, + emitCInterface); return rewriter.replaceOpWithNewOp(op, resultType, fn, operands); } /// Generates dimension size call. -static Value genDimSizeCall(OpBuilder &builder, Operation *op, +static Value genDimSizeCall(OpBuilder &builder, Location loc, SparseTensorEncodingAttr &enc, Value src, int64_t idx) { // Permute the index according to an optional dimension ordering. @@ -101,22 +103,42 @@ idx = p.getPermutedPosition(idx); // Generate the call. StringRef name = "sparseDimSize"; - SmallVector params{src, constantIndex(builder, op->getLoc(), idx)}; + SmallVector params{src, constantIndex(builder, loc, idx)}; Type iTp = builder.getIndexType(); - return createFuncCall(builder, op, name, iTp, params, EmitCInterface::Off) + return createFuncCall(builder, loc, name, iTp, params, EmitCInterface::Off) .getResult(0); } /// Generates a call into the "swiss army knife" method of the sparse runtime /// support library for materializing sparse tensors into the computation. -static Value genNewCall(OpBuilder &builder, Operation *op, +static Value genNewCall(OpBuilder &builder, Location loc, ArrayRef params) { StringRef name = "newSparseTensor"; Type pTp = getOpaquePointerType(builder); - return createFuncCall(builder, op, name, pTp, params, EmitCInterface::On) + return createFuncCall(builder, loc, name, pTp, params, EmitCInterface::On) .getResult(0); } +/// Compute the size from type (for static sizes) or from an already-converted +/// opaque pointer source (for dynamic sizes) at the given dimension. +static Value sizeFromPtrAtDim(OpBuilder &builder, Location loc, + SparseTensorEncodingAttr &enc, ShapedType stp, + Value src, unsigned dim) { + auto shape = stp.getShape(); + if (shape[dim] == ShapedType::kDynamicSize) + return genDimSizeCall(builder, loc, enc, src, dim); + return constantIndex(builder, loc, shape[dim]); +} + +/// Populates given sizes array from type (for static sizes) and from +/// an already-converted opaque pointer source (for dynamic sizes). +static void sizesFromPtr(OpBuilder &builder, SmallVector &sizes, + Location loc, SparseTensorEncodingAttr &enc, + ShapedType stp, Value src) { + for (unsigned i = 0, rank = stp.getRank(); i < rank; i++) + sizes.push_back(sizeFromPtrAtDim(builder, loc, enc, stp, src, i)); +} + /// Populates given sizes array from type. static void sizesFromType(OpBuilder &builder, SmallVector &sizes, Location loc, ShapedType stp) { @@ -135,18 +157,41 @@ sizes.push_back(linalg::createOrFoldDimOp(builder, loc, src, i)); } -/// Populates given sizes array from type (for static sizes) and from -/// an already converted into opague pointer source (for dynamic sizes). -static void sizesFromPtr(OpBuilder &builder, SmallVector &sizes, - Operation *op, SparseTensorEncodingAttr &enc, - ShapedType stp, Value src) { - Location loc = op->getLoc(); - auto shape = stp.getShape(); - for (unsigned i = 0, rank = stp.getRank(); i < rank; i++) - if (shape[i] == ShapedType::kDynamicSize) - sizes.push_back(genDimSizeCall(builder, op, enc, src, i)); - else - sizes.push_back(constantIndex(builder, loc, shape[i])); +/// Populates the given sizes array for concatenation from type (for static +/// sizes) and from an already-converted opaque pointer source (for dynamic +/// sizes). +static void concatSizesFromInputs(OpBuilder &builder, + SmallVector &sizes, Location loc, + ShapedType dstTp, ValueRange srcs, + unsigned dim) { + auto dstShape = dstTp.getShape(); + + auto srcTp = srcs[0].getType().cast(); + auto srcEnc = getSparseTensorEncoding(srcTp); + // We first fills the sizes from an input tensor, and then + // compute the size of the concatenation dimension if necessary. + if (srcEnc) + // Reuses sizes from an arbitrary input tensor is fine. + sizesFromPtr(builder, sizes, loc, srcEnc, srcTp, srcs[0]); + else + sizesFromSrc(builder, sizes, loc, srcs[0]); + + // Sum up on the `dim` if the dimension is dynamic. + if (dstShape[dim] != ShapedType::kDynamicSize) { + // Faithfully take the static size. + sizes[dim] = constantIndex(builder, loc, dstShape[dim]); + } else { + // Else, compute the shape dynamically. + for (size_t i = 1, sz = srcs.size(); i < sz; i++) { + auto srcTp = srcs[i].getType().cast(); + auto encSrc = getSparseTensorEncoding(srcTp); + Value srcSz = + encSrc ? sizeFromPtrAtDim(builder, loc, encSrc, srcTp, srcs[i], dim) + : linalg::createOrFoldDimOp(builder, loc, srcs[i], dim); + // Sum up all the sizes. + sizes[dim] = builder.create(loc, sizes[dim], srcSz); + } + } } /// Generates an uninitialized temporary buffer of the given size and @@ -195,10 +240,9 @@ /// sparse runtime support library for materializing sparse tensors into the /// computation. static void newParams(OpBuilder &builder, SmallVector ¶ms, - Operation *op, ShapedType stp, + Location loc, ShapedType stp, SparseTensorEncodingAttr &enc, Action action, ValueRange szs, Value ptr = Value()) { - Location loc = op->getLoc(); ArrayRef dlt = enc.getDimLevelType(); unsigned sz = dlt.size(); // Sparsity annotations. @@ -234,6 +278,20 @@ params.push_back(ptr); } +/// Generates the code to read the value from tensor[ivs].The generated code +/// looks like the following and the insertion point after this routine is +/// inside the if-then branch behind the assignment to ind. +/// if (tensor[ivs] != 0) +/// insert_point +static Value genValueForDense(OpBuilder &builder, Location loc, Value tensor, + ValueRange ivs) { + Value val = builder.create(loc, tensor, ivs); + Value cond = genIsNonzero(builder, loc, val); + scf::IfOp ifOp = builder.create(loc, cond, /*else*/ false); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + return val; +} + /// Generates the code to read the value from tensor[ivs], and conditionally /// stores the indices ivs to the memory in ind. The generated code looks like /// the following and the insertion point after this routine is inside the @@ -243,10 +301,7 @@ /// ind = ivs static Value genIndexAndValueForDense(OpBuilder &builder, Location loc, Value tensor, Value ind, ValueRange ivs) { - Value val = builder.create(loc, tensor, ivs); - Value cond = genIsNonzero(builder, loc, val); - scf::IfOp ifOp = builder.create(loc, cond, /*else*/ false); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + Value val = genValueForDense(builder, loc, tensor, ivs); unsigned i = 0; for (auto iv : ivs) { Value idx = constantIndex(builder, loc, i++); @@ -256,11 +311,10 @@ } /// Generates a call to release/delete a `SparseTensorCOO`. -static void genDelCOOCall(OpBuilder &builder, Operation *op, Type elemTp, +static void genDelCOOCall(OpBuilder &builder, Location loc, Type elemTp, Value coo) { SmallString<21> name{"delSparseTensorCOO", primaryTypeFunctionSuffix(elemTp)}; - TypeRange noTp; - createFuncCall(builder, op, name, noTp, coo, EmitCInterface::Off); + createFuncCall(builder, loc, name, {}, coo, EmitCInterface::Off); } /// Generates a call that adds one element to a coordinate scheme. @@ -268,25 +322,25 @@ /// val = a[i1,..,ik]; /// if val != 0 /// t->add(&val, [i1,..,ik], [p1,..,pk]); -static void genAddEltCall(OpBuilder &builder, Operation *op, Type eltType, +static void genAddEltCall(OpBuilder &builder, Location loc, Type eltType, Value ptr, Value valPtr, Value ind, Value perm) { SmallString<9> name{"addElt", primaryTypeFunctionSuffix(eltType)}; SmallVector params{ptr, valPtr, ind, perm}; Type pTp = getOpaquePointerType(builder); - createFuncCall(builder, op, name, pTp, params, EmitCInterface::On); + createFuncCall(builder, loc, name, pTp, params, EmitCInterface::On); } /// Generates a call to `iter->getNext()`. If there is a next element, /// then it is copied into the out-parameters `ind` and `elemPtr`, /// and the return value is true. If there isn't a next element, then /// the memory for `iter` is freed and the return value is false. -static Value genGetNextCall(OpBuilder &builder, Operation *op, Value iter, +static Value genGetNextCall(OpBuilder &builder, Location loc, Value iter, Value ind, Value elemPtr) { Type elemTp = elemPtr.getType().cast().getElementType(); SmallString<10> name{"getNext", primaryTypeFunctionSuffix(elemTp)}; SmallVector params{iter, ind, elemPtr}; Type i1 = builder.getI1Type(); - return createFuncCall(builder, op, name, i1, params, EmitCInterface::On) + return createFuncCall(builder, loc, name, i1, params, EmitCInterface::On) .getResult(0); } @@ -346,18 +400,43 @@ builder.create(loc, buffer); } -/// Inserts the element returned by genGetNextCall(_, ind, elemPtr) into -/// the tensor created by allocDenseTensor(). The `rank` is the rank -/// of the `tensor` and the length of `ind`. -static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc, - Value elemPtr, Value tensor, - unsigned rank, Value ind) { +/// Converts a pointer to COO (from calls to iter->next()) into a vector of +/// indices, apply (optional) `offset` on `offsetDim`. +static SmallVector loadIndices(OpBuilder &builder, Location loc, + unsigned rank, Value ind, + unsigned offsetDim = 0, + Value offset = Value()) { SmallVector ivs; ivs.reserve(rank); for (unsigned i = 0; i < rank; i++) { Value idx = constantIndex(builder, loc, i); - ivs.push_back(builder.create(loc, ind, idx)); + idx = builder.create(loc, ind, idx); + if (offsetDim == i && offset) + idx = builder.create(loc, idx, offset); + ivs.push_back(idx); } + return ivs; +} + +/// Converts the vector indices and store it into the memory pointed by +/// `ind`, apply (optional) `offset` on `offsetDim`. +static void storeIndices(OpBuilder &builder, Location loc, unsigned rank, + Value ind, ValueRange ivs, unsigned offsetDim = 0, + Value offset = Value()) { + for (unsigned i = 0; i < rank; i++) { + Value idx = ivs[i]; + if (offsetDim == i && offset) + idx = builder.create(loc, idx, offset); + builder.create(loc, idx, ind, + constantIndex(builder, loc, i)); + } +} + +/// Inserts a value stored in `elemPtr` into a dense tensor created by +/// allocDenseTensor(). +static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc, + Value elemPtr, Value tensor, + ValueRange ivs) { Value elemV = builder.create(loc, elemPtr); builder.create(loc, elemV, tensor, ivs); } @@ -470,16 +549,16 @@ encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth()); SmallVector sizes; SmallVector params; - sizesFromPtr(rewriter, sizes, op, noPerm, srcTp, src); - newParams(rewriter, params, op, srcTp, noPerm, Action::kToIterator, sizes, + sizesFromPtr(rewriter, sizes, loc, noPerm, srcTp, src); + newParams(rewriter, params, loc, srcTp, noPerm, Action::kToIterator, sizes, src); - Value iter = genNewCall(rewriter, op, params); + Value iter = genNewCall(rewriter, loc, params); // Start a new COO for the destination tensor. sizes.clear(); params.clear(); - sizesFromPtr(rewriter, sizes, op, encDst, dstTp, src); - newParams(rewriter, params, op, dstTp, encDst, Action::kEmptyCOO, sizes); - Value coo = genNewCall(rewriter, op, params); + sizesFromPtr(rewriter, sizes, loc, encDst, dstTp, src); + newParams(rewriter, params, loc, dstTp, encDst, Action::kEmptyCOO, sizes); + Value coo = genNewCall(rewriter, loc, params); Value dstPerm = params[2]; // Construct a while loop over the iterator. Value srcIdx = genAlloca(rewriter, loc, srcRank, rewriter.getIndexType()); @@ -490,26 +569,116 @@ auto whileOp = rewriter.create(loc, noTypes, noArgs); Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, noTypes); rewriter.setInsertionPointToEnd(before); - Value cond = genGetNextCall(rewriter, op, iter, srcIdx, elemPtr); + Value cond = genGetNextCall(rewriter, loc, iter, srcIdx, elemPtr); rewriter.create(loc, cond, before->getArguments()); // Translate indices from source to target and insert. Note that we do // not need to store the value in elemPtr, as the value is still there. Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes); rewriter.setInsertionPointToStart(after); translateIndices(loc, rewriter, reassociation, dstTp, srcTp, dstIdx, srcIdx); - genAddEltCall(rewriter, op, elemTp, coo, elemPtr, dstIdx, dstPerm); + genAddEltCall(rewriter, loc, elemTp, coo, elemPtr, dstIdx, dstPerm); rewriter.create(loc); // Final call to construct sparse tensor storage and free temporary resources. rewriter.setInsertionPointAfter(whileOp); params[6] = constantAction(rewriter, loc, Action::kFromCOO); params[7] = coo; - Value dst = genNewCall(rewriter, op, params); - genDelCOOCall(rewriter, op, elemTp, coo); - genDelCOOCall(rewriter, op, elemTp, iter); + Value dst = genNewCall(rewriter, loc, params); + genDelCOOCall(rewriter, loc, elemTp, coo); + genDelCOOCall(rewriter, loc, elemTp, iter); rewriter.replaceOp(op, dst); return success(); } +// Generates a while loop that iterates over the COO list extracted +// from `t`, using `bodyBuilder` to build the loop body. +// while (elem = coo->getNext()) { +// bodyBuilder +// } +// TODO: It can be used by other operators (ReshapeOp, ConvertOP) conversion to +// reduce code repetition! +static void genSparseCOOIterationLoop( + ConversionPatternRewriter &rewriter, Location loc, Value t, + RankedTensorType tensorTp, + function_ref bodyBuilder) { + auto enc = getSparseTensorEncoding(tensorTp); + assert(enc && "Generating Sparse Tensor COO Loop on a Dense Tensor!"); + + unsigned rank = tensorTp.getRank(); + Type elemTp = tensorTp.getElementType(); + + // Start an iterator over the tensor (in original index order). + auto noPerm = SparseTensorEncodingAttr::get( + rewriter.getContext(), enc.getDimLevelType(), AffineMap(), + enc.getPointerBitWidth(), enc.getIndexBitWidth()); + SmallVector sizes; + SmallVector params; + sizesFromPtr(rewriter, sizes, loc, noPerm, tensorTp, t); + newParams(rewriter, params, loc, tensorTp, noPerm, Action::kToIterator, sizes, + t); + Value iter = genNewCall(rewriter, loc, params); + + // Construct a while loop over the iterator. + Value srcIdx = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); + Value elemPtr = genAllocaScalar(rewriter, loc, elemTp); + SmallVector noArgs; + SmallVector noTypes; + auto whileOp = rewriter.create(loc, noTypes, noArgs); + Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, noTypes); + rewriter.setInsertionPointToEnd(before); + Value cond = genGetNextCall(rewriter, loc, iter, srcIdx, elemPtr); + rewriter.create(loc, cond, before->getArguments()); + Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes); + rewriter.setInsertionPointToStart(after); + // Callback here to build loop body. + bodyBuilder(rewriter, loc, srcIdx, elemPtr); + rewriter.create(loc); + // Finish generating loop. + rewriter.setInsertionPointAfter(whileOp); + + // Free memory for iterator. + genDelCOOCall(rewriter, loc, elemTp, iter); +} + +// Generate loop that iterates over a dense tensor. +// for i1 in dim1 +// .. +// for ik in dimk +// val = a[i1,..,ik] +// if val != 0 +// bodyBuilder(v, [i1, ..., ik]) +// TODO: It can be used by other operators (ReshapeOp, ConvertOP) conversion to +// reduce code repetition! +static void genDenseTensorIterationLoop( + ConversionPatternRewriter &rewriter, Location loc, Value t, + RankedTensorType tensorTp, + function_ref bodyBuilder) { + auto enc = getSparseTensorEncoding(tensorTp); + assert(!enc && "Generating Densor Tensor Loop on a Sparse Tensor!"); + + unsigned rank = tensorTp.getRank(); + Value zero = constantIndex(rewriter, loc, 0); + Value one = constantIndex(rewriter, loc, 1); + + SmallVector lo; + SmallVector hi; + SmallVector st; + + // Fill out loop iteration information. + for (unsigned i = 0; i < rank; i++) { + lo.push_back(zero); + hi.push_back(linalg::createOrFoldDimOp(rewriter, loc, t, i)); + st.push_back(one); + } + + scf::buildLoopNest(rewriter, loc, lo, hi, st, {}, + [&](OpBuilder &builder, Location loc, ValueRange ivs, + ValueRange args) -> scf::ValueVector { + // Invoke callback to build the body of the loop. + bodyBuilder(builder, loc, ivs); + return {}; + }); +} + //===----------------------------------------------------------------------===// // Conversion rules. //===----------------------------------------------------------------------===// @@ -544,7 +713,8 @@ // Generate the call. Value src = adaptor.getOperands()[0]; int64_t idx = *index; - rewriter.replaceOp(op, genDimSizeCall(rewriter, op, enc, src, idx)); + rewriter.replaceOp(op, + genDimSizeCall(rewriter, op->getLoc(), enc, src, idx)); return success(); } }; @@ -594,6 +764,7 @@ LogicalResult matchAndRewrite(NewOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { + Location loc = op.getLoc(); Type resType = op.getType(); auto enc = getSparseTensorEncoding(resType); if (!enc) @@ -603,10 +774,10 @@ SmallVector sizes; SmallVector params; ShapedType stp = resType.cast(); - sizesFromType(rewriter, sizes, op.getLoc(), stp); + sizesFromType(rewriter, sizes, loc, stp); Value ptr = adaptor.getOperands()[0]; - newParams(rewriter, params, op, stp, enc, Action::kFromFile, sizes, ptr); - rewriter.replaceOp(op, genNewCall(rewriter, op, params)); + newParams(rewriter, params, loc, stp, enc, Action::kFromFile, sizes, ptr); + rewriter.replaceOp(op, genNewCall(rewriter, loc, params)); return success(); } }; @@ -622,6 +793,7 @@ if (op.getCopy()) return rewriter.notifyMatchFailure(op, "sparse tensor copy not implemented"); + Location loc = op.getLoc(); RankedTensorType resType = op.getType(); auto enc = getSparseTensorEncoding(resType); if (!enc) @@ -633,16 +805,16 @@ if (resType.isDynamicDim(i)) { sizes.push_back(adaptor.getOperands()[operandCtr++]); } else { - sizes.push_back(rewriter.create( - op.getLoc(), op.getStaticSize(i))); + sizes.push_back( + rewriter.create(loc, op.getStaticSize(i))); } } // Generate the call to construct empty tensor. The sizes are // explicitly defined by the arguments to the alloc operator. SmallVector params; ShapedType stp = resType.cast(); - newParams(rewriter, params, op, stp, enc, Action::kEmpty, sizes); - rewriter.replaceOp(op, genNewCall(rewriter, op, params)); + newParams(rewriter, params, loc, stp, enc, Action::kEmpty, sizes); + rewriter.replaceOp(op, genNewCall(rewriter, loc, params)); return success(); } }; @@ -681,7 +853,7 @@ SmallVector sizes; SmallVector params; ShapedType stp = srcType.cast(); - sizesFromPtr(rewriter, sizes, op, encSrc, stp, src); + sizesFromPtr(rewriter, sizes, loc, encSrc, stp, src); bool useDirectConversion; switch (options.sparseToSparseStrategy) { case SparseToSparseConversionStrategy::kViaCOO: @@ -697,9 +869,9 @@ break; } if (useDirectConversion) { - newParams(rewriter, params, op, stp, encDst, Action::kSparseToSparse, + newParams(rewriter, params, loc, stp, encDst, Action::kSparseToSparse, sizes, src); - rewriter.replaceOp(op, genNewCall(rewriter, op, params)); + rewriter.replaceOp(op, genNewCall(rewriter, loc, params)); } else { // use via-COO conversion. // Set up encoding with right mix of src and dst so that the two // method calls can share most parameters, while still providing @@ -707,14 +879,14 @@ auto enc = SparseTensorEncodingAttr::get( op->getContext(), encDst.getDimLevelType(), encDst.getDimOrdering(), encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth()); - newParams(rewriter, params, op, stp, enc, Action::kToCOO, sizes, src); - Value coo = genNewCall(rewriter, op, params); + newParams(rewriter, params, loc, stp, enc, Action::kToCOO, sizes, src); + Value coo = genNewCall(rewriter, loc, params); params[3] = constantPointerTypeEncoding(rewriter, loc, encDst); params[4] = constantIndexTypeEncoding(rewriter, loc, encDst); params[6] = constantAction(rewriter, loc, Action::kFromCOO); params[7] = coo; - Value dst = genNewCall(rewriter, op, params); - genDelCOOCall(rewriter, op, stp.getElementType(), coo); + Value dst = genNewCall(rewriter, loc, params); + genDelCOOCall(rewriter, loc, stp.getElementType(), coo); rewriter.replaceOp(op, dst); } return success(); @@ -741,10 +913,10 @@ AffineMap(), encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth()); SmallVector sizes; SmallVector params; - sizesFromPtr(rewriter, sizes, op, encSrc, srcTensorTp, src); - newParams(rewriter, params, op, dstTensorTp, encDst, Action::kToIterator, + sizesFromPtr(rewriter, sizes, loc, encSrc, srcTensorTp, src); + newParams(rewriter, params, loc, dstTensorTp, encDst, Action::kToIterator, sizes, src); - Value iter = genNewCall(rewriter, op, params); + Value iter = genNewCall(rewriter, loc, params); Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); Value elemPtr = genAllocaScalar(rewriter, loc, elemTp); Block *insertionBlock = rewriter.getInsertionBlock(); @@ -756,14 +928,15 @@ auto whileOp = rewriter.create(loc, noTypes, noArgs); Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, noTypes); rewriter.setInsertionPointToEnd(before); - Value cond = genGetNextCall(rewriter, op, iter, ind, elemPtr); + Value cond = genGetNextCall(rewriter, loc, iter, ind, elemPtr); rewriter.create(loc, cond, before->getArguments()); Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes); rewriter.setInsertionPointToStart(after); - insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, rank, ind); + SmallVector ivs = loadIndices(rewriter, loc, rank, ind); + insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, ivs); rewriter.create(loc); rewriter.setInsertionPointAfter(whileOp); - genDelCOOCall(rewriter, op, elemTp, iter); + genDelCOOCall(rewriter, loc, elemTp, iter); rewriter.replaceOpWithNewOp(op, resType, dst); // Deallocate the buffer. if (bufferization::allocationDoesNotEscape(op->getOpResult(0))) { @@ -807,8 +980,8 @@ SmallVector sizes; SmallVector params; sizesFromSrc(rewriter, sizes, loc, src); - newParams(rewriter, params, op, stp, encDst, Action::kEmptyCOO, sizes); - Value coo = genNewCall(rewriter, op, params); + newParams(rewriter, params, loc, stp, encDst, Action::kEmptyCOO, sizes); + Value coo = genNewCall(rewriter, loc, params); Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); Value perm = params[2]; SmallVector lo; @@ -846,14 +1019,14 @@ else val = genIndexAndValueForDense(rewriter, loc, src, ind, ivs); builder.create(loc, val, elemPtr); - genAddEltCall(rewriter, op, eltType, coo, elemPtr, ind, perm); + genAddEltCall(rewriter, loc, eltType, coo, elemPtr, ind, perm); return {}; }); // Final call to construct sparse tensor storage. params[6] = constantAction(rewriter, loc, Action::kFromCOO); params[7] = coo; - Value dst = genNewCall(rewriter, op, params); - genDelCOOCall(rewriter, op, eltType, coo); + Value dst = genNewCall(rewriter, loc, params); + genDelCOOCall(rewriter, loc, eltType, coo); rewriter.replaceOp(op, dst); return success(); } @@ -875,8 +1048,7 @@ if (!enc) return failure(); StringRef name = "delSparseTensor"; - TypeRange noTp; - createFuncCall(rewriter, op, name, noTp, adaptor.getOperands(), + createFuncCall(rewriter, op->getLoc(), name, {}, adaptor.getOperands(), EmitCInterface::Off); rewriter.eraseOp(op); return success(); @@ -942,8 +1114,7 @@ if (op.getHasInserts()) { // Finalize any pending insertions. StringRef name = "endInsert"; - TypeRange noTp; - createFuncCall(rewriter, op, name, noTp, adaptor.getOperands(), + createFuncCall(rewriter, op->getLoc(), name, {}, adaptor.getOperands(), EmitCInterface::Off); } rewriter.replaceOp(op, adaptor.getOperands()); @@ -960,8 +1131,7 @@ ConversionPatternRewriter &rewriter) const override { Type elemTp = op.getTensor().getType().cast().getElementType(); SmallString<12> name{"lexInsert", primaryTypeFunctionSuffix(elemTp)}; - TypeRange noTp; - replaceOpWithFuncCall(rewriter, op, name, noTp, adaptor.getOperands(), + replaceOpWithFuncCall(rewriter, op, name, {}, adaptor.getOperands(), EmitCInterface::On); return success(); } @@ -984,7 +1154,7 @@ // Determine the size for access expansion. auto enc = getSparseTensorEncoding(srcType); Value src = adaptor.getOperands()[0]; - Value sz = genDimSizeCall(rewriter, op, enc, src, srcType.getRank() - 1); + Value sz = genDimSizeCall(rewriter, loc, enc, src, srcType.getRank() - 1); // Allocate temporary buffers for values, filled-switch, and indices. // We do not use stack buffers for this, since the expanded size may // be rather large (as it envelops a single expanded dense dimension). @@ -1024,8 +1194,7 @@ // access pattern. Type elemTp = op.getTensor().getType().cast().getElementType(); SmallString<12> name{"expInsert", primaryTypeFunctionSuffix(elemTp)}; - TypeRange noTp; - replaceOpWithFuncCall(rewriter, op, name, noTp, adaptor.getOperands(), + replaceOpWithFuncCall(rewriter, op, name, {}, adaptor.getOperands(), EmitCInterface::On); // Deallocate the buffers on exit of the loop nest. Operation *parent = op; @@ -1043,6 +1212,139 @@ } }; +/// Sparse conversion rule for the concatenate operator. +class SparseTensorConcatConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(ConcatenateOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + // The conversion works as follow: + // (1). When output is sparse, and mix of inputs: + // a_sparse = concat (b_dense, c_sparse, ....) + // => + // coo_for_a = newSparseCOO(shapeOf(a)) + // for i, j, k // dense input + // coo->add(adjustForOffset(i,j,k), b[i,j,k]) + // + // for elem in sparse_input + // coo->add(adjustForOffset(elem.indices), elem.value) + // ... + // a = newSparseTensor(coo_for_a) + // return a + // + // (2). When output is dense, and mix of inputs: + // a_dense = concat (b_dense, c_sparse, ....) + // => + // a = malloc(shapeOf(a)) + // for i, j, k // dense input + // a[ adjustForOffset(i,j,k) ] = b[i,j,k] + // + // for elem in sparse_input + // a[ adjustForOffset(elem.indices) ] = elem.value + // return a + Location loc = op.getLoc(); + auto dstTp = op.getType().cast(); + auto encDst = getSparseTensorEncoding(dstTp); + Type elemTp = dstTp.getElementType(); + uint64_t concatDim = op.getDimension().getZExtValue(); + unsigned rank = dstTp.getRank(); + + Value dst; // destination tensor + Value dstPerm; // destination tensor permutation (if sparse out) + // A pointer to the value being inserted (if dense => sparse) + Value elemPtr; + // Memory that holds the COO for destination tensor (if sparse out) + Value dstIdx; + // The offset applied to the dimenstion to be concated (starting from 0) + Value offset = constantIndex(rewriter, loc, 0); + + SmallVector sizes; + SmallVector params; + concatSizesFromInputs(rewriter, sizes, loc, dstTp, op.getInputs(), + concatDim); + + if (encDst) { + // Start a new COO for the destination tensor. + newParams(rewriter, params, loc, dstTp, encDst, Action::kEmptyCOO, sizes); + dst = genNewCall(rewriter, loc, params); + dstPerm = params[2]; + elemPtr = genAllocaScalar(rewriter, loc, elemTp); + dstIdx = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); + } else { + // TODO: Dense buffers should be allocated/deallocated via the callback + // in BufferizationOptions. + dst = allocDenseTensor(rewriter, loc, dstTp, sizes); + } + for (auto it : llvm::zip(op.getInputs(), adaptor.getInputs())) { + Value orignalOp = std::get<0>(it); // Input (with encoding) from Op + Value adaptedOp = std::get<1>(it); // Input (type converted) from adaptor + RankedTensorType srcTp = orignalOp.getType().cast(); + auto encSrc = getSparseTensorEncoding(srcTp); + if (encSrc) { + genSparseCOOIterationLoop( + rewriter, loc, adaptedOp, srcTp, + [&](OpBuilder &builder, Location loc, Value idx, + Value elemPtr) -> void { + auto indVec = + loadIndices(builder, loc, rank, idx, concatDim, offset); + if (encDst) { + // Case: sparse => sparse + storeIndices(builder, loc, rank, dstIdx, indVec); + genAddEltCall(builder, loc, elemTp, dst, elemPtr, dstIdx, + dstPerm); + } else { + // Case: sparse => dense + insertScalarIntoDenseTensor(builder, loc, elemPtr, dst, indVec); + } + }); + } else { + genDenseTensorIterationLoop( + rewriter, loc, adaptedOp, srcTp, + [&](OpBuilder &builder, Location loc, ValueRange idx) -> void { + if (encDst) { + // Case: dense => sparse + storeIndices(builder, loc, rank, dstIdx, idx, concatDim, + offset); + Value val = genValueForDense(builder, loc, adaptedOp, idx); + builder.create(loc, val, elemPtr); + genAddEltCall(builder, loc, elemTp, dst, elemPtr, dstIdx, + dstPerm); + } else { + // Case: dense => dense + Value val = genValueForDense(builder, loc, adaptedOp, idx); + SmallVector indVec(idx); + // Apply offset. + indVec[concatDim] = builder.create( + loc, indVec[concatDim], offset); + builder.create(loc, val, dst, indVec); + } + }); + } + // Accumulate offset. + // TODO: avoid calling sparseDimSize multiple times by caching the result! + Value curDim = encSrc ? sizeFromPtrAtDim(rewriter, loc, encSrc, srcTp, + adaptedOp, concatDim) + : linalg::createOrFoldDimOp(rewriter, loc, + adaptedOp, concatDim); + + offset = rewriter.create(loc, offset, curDim); + } + if (encDst) { + params[6] = constantAction(rewriter, loc, Action::kFromCOO); + // In sparse output case, the destination holds the COO. + Value coo = dst; + params[7] = coo; + dst = genNewCall(rewriter, loc, params); + // Release resources. + genDelCOOCall(rewriter, loc, elemTp, coo); + rewriter.replaceOp(op, dst); + } else { + rewriter.replaceOpWithNewOp(op, dstTp, dst); + } + return success(); + } +}; /// Sparse conversion rule for the output operator. class SparseTensorOutConverter : public OpConversionPattern { public: @@ -1057,12 +1359,12 @@ auto encSrc = getSparseTensorEncoding(srcType); SmallVector sizes; SmallVector params; - sizesFromPtr(rewriter, sizes, op, encSrc, srcType, src); + sizesFromPtr(rewriter, sizes, loc, encSrc, srcType, src); auto enc = SparseTensorEncodingAttr::get( op->getContext(), encSrc.getDimLevelType(), AffineMap(), encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth()); - newParams(rewriter, params, op, srcType, enc, Action::kToCOO, sizes, src); - Value coo = genNewCall(rewriter, op, params); + newParams(rewriter, params, loc, srcType, enc, Action::kToCOO, sizes, src); + Value coo = genNewCall(rewriter, loc, params); // Then output the tensor to external file with indices in the externally // visible lexicographic index order. A sort is required if the source was // not in that order yet (note that the sort can be dropped altogether if @@ -1076,9 +1378,8 @@ params.push_back(constantI1(rewriter, loc, sort)); Type eltType = srcType.getElementType(); SmallString<18> name{"outSparseTensor", primaryTypeFunctionSuffix(eltType)}; - TypeRange noTp; - createFuncCall(rewriter, op, name, noTp, params, EmitCInterface::Off); - genDelCOOCall(rewriter, op, eltType, coo); + createFuncCall(rewriter, loc, name, {}, params, EmitCInterface::Off); + genDelCOOCall(rewriter, loc, eltType, coo); rewriter.eraseOp(op); return success(); } @@ -1099,12 +1400,13 @@ SparseCastConverter, SparseTensorNewConverter, SparseReshapeConverter, SparseReshapeConverter, - SparseTensorAllocConverter, SparseTensorDeallocConverter, - SparseTensorToPointersConverter, SparseTensorToIndicesConverter, - SparseTensorToValuesConverter, SparseTensorLoadConverter, - SparseTensorLexInsertConverter, SparseTensorExpandConverter, - SparseTensorCompressConverter, SparseTensorOutConverter>( - typeConverter, patterns.getContext()); + SparseTensorConcatConverter, SparseTensorAllocConverter, + SparseTensorDeallocConverter, SparseTensorToPointersConverter, + SparseTensorToIndicesConverter, SparseTensorToValuesConverter, + SparseTensorLoadConverter, SparseTensorLexInsertConverter, + SparseTensorExpandConverter, SparseTensorCompressConverter, + SparseTensorOutConverter>(typeConverter, patterns.getContext()); + patterns.add(typeConverter, patterns.getContext(), options); } diff --git a/mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp b/mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp --- a/mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp +++ b/mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp @@ -32,8 +32,8 @@ Value mlir::tosa::clampFloatHelper(Location loc, Value arg, arith::ConstantOp min, arith::ConstantOp max, OpBuilder &rewriter) { - Value minValue = rewriter.create(loc, arg, min); - return rewriter.create(loc, minValue, max); + Value minValue = rewriter.create(loc, arg, max); + return rewriter.create(loc, minValue, min); } Value mlir::tosa::clampIntHelper(Location loc, Value arg, arith::ConstantOp min, diff --git a/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp b/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp --- a/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp +++ b/mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp @@ -168,6 +168,8 @@ return processType(opcode, operands); case spirv::Opcode::OpTypeForwardPointer: return processTypeForwardPointer(operands); + case spirv::Opcode::OpTypeJointMatrixINTEL: + return processType(opcode, operands); case spirv::Opcode::OpConstant: return processConstant(operands, /*isSpec=*/false); case spirv::Opcode::OpSpecConstant: diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h --- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.h @@ -257,6 +257,8 @@ LogicalResult processFunctionType(ArrayRef operands); + LogicalResult processJointMatrixType(ArrayRef operands); + LogicalResult processImageType(ArrayRef operands); LogicalResult processSampledImageType(ArrayRef operands); diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp --- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp @@ -730,6 +730,8 @@ return processCooperativeMatrixType(operands); case spirv::Opcode::OpTypeFunction: return processFunctionType(operands); + case spirv::Opcode::OpTypeJointMatrixINTEL: + return processJointMatrixType(operands); case spirv::Opcode::OpTypeImage: return processImageType(operands); case spirv::Opcode::OpTypeSampledImage: @@ -888,6 +890,40 @@ return success(); } +LogicalResult +spirv::Deserializer::processJointMatrixType(ArrayRef operands) { + if (operands.size() != 6) { + return emitError(unknownLoc, "OpTypeJointMatrix must have element " + "type and row x column parameters"); + } + + Type elementTy = getType(operands[1]); + if (!elementTy) { + return emitError(unknownLoc, "OpTypeJointMatrix references undefined ") + << operands[1]; + } + + auto scope = spirv::symbolizeScope(getConstantInt(operands[5]).getInt()); + if (!scope) { + return emitError(unknownLoc, + "OpTypeJointMatrix references undefined scope ") + << operands[5]; + } + auto matrixLayout = + spirv::symbolizeMatrixLayout(getConstantInt(operands[4]).getInt()); + if (!matrixLayout) { + return emitError(unknownLoc, + "OpTypeJointMatrix references undefined scope ") + << operands[4]; + } + unsigned rows = getConstantInt(operands[2]).getInt(); + unsigned columns = getConstantInt(operands[3]).getInt(); + + typeMap[operands[0]] = spirv::JointMatrixINTELType::get( + elementTy, scope.value(), rows, columns, matrixLayout.value()); + return success(); +} + LogicalResult spirv::Deserializer::processRuntimeArrayType(ArrayRef operands) { if (operands.size() != 2) { diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp --- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp @@ -598,6 +598,27 @@ return success(); } + if (auto jointMatrixType = type.dyn_cast()) { + uint32_t elementTypeID = 0; + if (failed(processTypeImpl(loc, jointMatrixType.getElementType(), + elementTypeID, serializationCtx))) { + return failure(); + } + typeEnum = spirv::Opcode::OpTypeJointMatrixINTEL; + auto getConstantOp = [&](uint32_t id) { + auto attr = IntegerAttr::get(IntegerType::get(type.getContext(), 32), id); + return prepareConstantInt(loc, attr); + }; + operands.push_back(elementTypeID); + operands.push_back(getConstantOp(jointMatrixType.getRows())); + operands.push_back(getConstantOp(jointMatrixType.getColumns())); + operands.push_back(getConstantOp( + static_cast(jointMatrixType.getMatrixLayout()))); + operands.push_back( + getConstantOp(static_cast(jointMatrixType.getScope()))); + return success(); + } + if (auto matrixType = type.dyn_cast()) { uint32_t elementTypeID = 0; if (failed(processTypeImpl(loc, matrixType.getColumnType(), elementTypeID, diff --git a/mlir/lib/Tools/lsp-server-support/Protocol.cpp b/mlir/lib/Tools/lsp-server-support/Protocol.cpp --- a/mlir/lib/Tools/lsp-server-support/Protocol.cpp +++ b/mlir/lib/Tools/lsp-server-support/Protocol.cpp @@ -121,7 +121,7 @@ return false; if (!llvm::isAlpha(scheme[0])) return false; - return std::all_of(scheme.begin() + 1, scheme.end(), [](char c) { + return llvm::all_of(llvm::drop_begin(scheme), [](char c) { return llvm::isAlnum(c) || c == '+' || c == '.' || c == '-'; }); } diff --git a/mlir/lib/Transforms/Utils/TopologicalSortUtils.cpp b/mlir/lib/Transforms/Utils/TopologicalSortUtils.cpp --- a/mlir/lib/Transforms/Utils/TopologicalSortUtils.cpp +++ b/mlir/lib/Transforms/Utils/TopologicalSortUtils.cpp @@ -8,29 +8,19 @@ #include "mlir/Transforms/TopologicalSortUtils.h" #include "mlir/IR/OpDefinition.h" +#include "llvm/ADT/SetVector.h" using namespace mlir; -bool mlir::sortTopologically( - Block *block, llvm::iterator_range ops, - function_ref isOperandReady) { - if (ops.empty()) - return true; - - // The set of operations that have not yet been scheduled. - DenseSet unscheduledOps; - // Mark all operations as unscheduled. - for (Operation &op : ops) - unscheduledOps.insert(&op); - - Block::iterator nextScheduledOp = ops.begin(); - Block::iterator end = ops.end(); - +/// Return `true` if the given operation is ready to be scheduled. +static bool isOpReady(Block *block, Operation *op, + DenseSet &unscheduledOps, + function_ref isOperandReady) { // An operation is ready to be scheduled if all its operands are ready. An // operation is ready if: const auto isReady = [&](Value value, Operation *top) { // - the user-provided callback marks it as ready, - if (isOperandReady && isOperandReady(value, top)) + if (isOperandReady && isOperandReady(value, op)) return true; Operation *parent = value.getDefiningOp(); // - it is a block argument, @@ -41,12 +31,38 @@ if (!ancestor) return true; // - it is defined in a nested region, or - if (ancestor == top) + if (ancestor == op) return true; // - its ancestor in the block is scheduled. return !unscheduledOps.contains(ancestor); }; + // An operation is recursively ready to be scheduled of it and its nested + // operations are ready. + WalkResult readyToSchedule = op->walk([&](Operation *nestedOp) { + return llvm::all_of(nestedOp->getOperands(), + [&](Value operand) { return isReady(operand, op); }) + ? WalkResult::advance() + : WalkResult::interrupt(); + }); + return !readyToSchedule.wasInterrupted(); +} + +bool mlir::sortTopologically( + Block *block, llvm::iterator_range ops, + function_ref isOperandReady) { + if (ops.empty()) + return true; + + // The set of operations that have not yet been scheduled. + DenseSet unscheduledOps; + // Mark all operations as unscheduled. + for (Operation &op : ops) + unscheduledOps.insert(&op); + + Block::iterator nextScheduledOp = ops.begin(); + Block::iterator end = ops.end(); + bool allOpsScheduled = true; while (!unscheduledOps.empty()) { bool scheduledAtLeastOnce = false; @@ -56,16 +72,7 @@ // set, and "schedule" it (move it before the `nextScheduledOp`). for (Operation &op : llvm::make_early_inc_range(llvm::make_range(nextScheduledOp, end))) { - // An operation is recursively ready to be scheduled of it and its nested - // operations are ready. - WalkResult readyToSchedule = op.walk([&](Operation *nestedOp) { - return llvm::all_of( - nestedOp->getOperands(), - [&](Value operand) { return isReady(operand, &op); }) - ? WalkResult::advance() - : WalkResult::interrupt(); - }); - if (readyToSchedule.wasInterrupted()) + if (!isOpReady(block, &op, unscheduledOps, isOperandReady)) continue; // Schedule the operation by moving it to the start. @@ -96,3 +103,48 @@ isOperandReady); return sortTopologically(block, *block, isOperandReady); } + +bool mlir::computeTopologicalSorting( + Block *block, MutableArrayRef ops, + function_ref isOperandReady) { + if (ops.empty()) + return true; + + // The set of operations that have not yet been scheduled. + DenseSet unscheduledOps; + + // Mark all operations as unscheduled. + for (Operation *op : ops) { + assert(op->getBlock() == block && "op must belong to block"); + unscheduledOps.insert(op); + } + + unsigned nextScheduledOp = 0; + + bool allOpsScheduled = true; + while (!unscheduledOps.empty()) { + bool scheduledAtLeastOnce = false; + + // Loop over the ops that are not sorted yet, try to find the ones "ready", + // i.e. the ones for which there aren't any operand produced by an op in the + // set, and "schedule" it (swap it with the op at `nextScheduledOp`). + for (unsigned i = nextScheduledOp; i < ops.size(); ++i) { + if (!isOpReady(block, ops[i], unscheduledOps, isOperandReady)) + continue; + + // Schedule the operation by moving it to the start. + unscheduledOps.erase(ops[i]); + std::swap(ops[i], ops[nextScheduledOp]); + scheduledAtLeastOnce = true; + ++nextScheduledOp; + } + + // If no operations were scheduled, just schedule the first op and continue. + if (!scheduledAtLeastOnce) { + allOpsScheduled = false; + unscheduledOps.erase(ops[nextScheduledOp++]); + } + } + + return allOpsScheduled; +} diff --git a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py --- a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py +++ b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py @@ -110,6 +110,24 @@ ip=ip) +class MatchOp: + """Specialization for MatchOp class.""" + + @classmethod + def match_op_names(MatchOp, + target: Union[Operation, Value], + names: Sequence[str], + loc=None, + ip=None): + pdl_operation_type = pdl.OperationType.get() + return MatchOp( + pdl_operation_type, + _get_op_result_or_value(target), + ops=ArrayAttr.get(list(map(lambda s: StringAttr.get(s), names))), + loc=loc, + ip=ip) + + class MultiTileSizesOp: """Specialization for MultitileSizesOp class.""" diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s --convert-complex-to-standard --split-input-file |\ -// RUN: FileCheck %s +// RUN: FileCheck %s --dump-input=always // CHECK-LABEL: func @complex_abs // CHECK-SAME: %[[ARG:.*]]: complex @@ -262,21 +262,21 @@ %log1p = complex.log1p %arg: complex return %log1p : complex } + // CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex +// CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] : f32 +// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] : f32 +// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] : f32 +// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] : f32 +// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] : f32 +// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] : f32 // CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] : f32 -// CHECK: %[[NEW_COMPLEX:.*]] = complex.create %[[REAL_PLUS_ONE]], %[[IMAG]] : complex -// CHECK: %[[REAL:.*]] = complex.re %[[NEW_COMPLEX]] : complex -// CHECK: %[[IMAG:.*]] = complex.im %[[NEW_COMPLEX]] : complex -// CHECK: %[[SQR_REAL:.*]] = arith.mulf %[[REAL]], %[[REAL]] : f32 -// CHECK: %[[SQR_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] : f32 -// CHECK: %[[SQ_NORM:.*]] = arith.addf %[[SQR_REAL]], %[[SQR_IMAG]] : f32 -// CHECK: %[[NORM:.*]] = math.sqrt %[[SQ_NORM]] : f32 -// CHECK: %[[RESULT_REAL:.*]] = math.log %[[NORM]] : f32 -// CHECK: %[[REAL2:.*]] = complex.re %[[NEW_COMPLEX]] : complex -// CHECK: %[[IMAG2:.*]] = complex.im %[[NEW_COMPLEX]] : complex -// CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG2]], %[[REAL2]] : f32 +// CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] : f32 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] : complex diff --git a/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir b/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir --- a/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir +++ b/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir @@ -303,3 +303,15 @@ %double_result = math.tan %double : vector<2xf64> return %float_result, %double_result : vector<2xf32>, vector<2xf64> } + +// CHECK-LABEL: func @log1p_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +func.func @log1p_caller(%float: f32, %double: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @log1pf(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.log1p %float : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @log1p(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.log1p %double : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} \ No newline at end of file diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -467,8 +467,8 @@ // CHECK: ^bb0(%[[ARG1:.+]]: f16, // CHECK-DAG: %[[C0:.+]] = arith.constant 0.0 // CHECK-DAG: %[[C6:.+]] = arith.constant 6.0 - // CHECK-DAG: %[[MIN:.+]] = arith.minf %[[ARG1]], %[[C0]] - // CHECK-DAG: %[[MAX:.+]] = arith.maxf %[[MIN]], %[[C6]] + // CHECK-DAG: %[[MIN:.+]] = arith.minf %[[ARG1]], %[[C6]] + // CHECK-DAG: %[[MAX:.+]] = arith.maxf %[[MIN]], %[[C0]] %0 = "tosa.clamp"(%arg0) {min_int = 0 : i64, max_int = 0 : i64, min_fp = 0.0 : f32, max_fp = 6.0 : f32} : (tensor<1xf16>) -> tensor<1xf16> return diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir @@ -94,7 +94,7 @@ // CHECK: func @insertion_point_outside_loop( // CHECK-SAME: %[[t:.*]]: memref, %[[sz:.*]]: index, %[[idx:.*]]: index) func.func @insertion_point_outside_loop(%t : tensor, %sz : index, - %idx : index) -> (tensor) { + %idx : index) -> (tensor) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c5 = arith.constant 5 : index @@ -118,3 +118,21 @@ return %r : tensor } + +// ----- + +// AllocTensorElimination does currently not apply to chains where the type is +// changing. This test just ensures that we do not crash or generate IR that +// does not verify. + +// CHECK-LABEL: func @shape_mismatch +func.func @shape_mismatch(%t: tensor<5x6x128xf32>) -> tensor<5x6x128xf32> { + %cst = arith.constant 8.0 : f32 + %0 = bufferization.alloc_tensor() : tensor<128xf32> + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128xf32>) -> tensor<128xf32> + %2 = tensor.expand_shape %1 [[0, 1, 2]] + : tensor<128xf32> into tensor<1x1x128xf32> + %3 = tensor.insert_slice %2 into %t[2, 3, 0][1, 1, 128][1, 1, 1] + : tensor<1x1x128xf32> into tensor<5x6x128xf32> + return %3 : tensor<5x6x128xf32> +} diff --git a/mlir/test/Dialect/Math/algebraic-simplification.mlir b/mlir/test/Dialect/Math/algebraic-simplification.mlir --- a/mlir/test/Dialect/Math/algebraic-simplification.mlir +++ b/mlir/test/Dialect/Math/algebraic-simplification.mlir @@ -73,3 +73,183 @@ %1 = math.powf %arg1, %v : vector<4xf32> return %0, %1 : f32, vector<4xf32> } + +// CHECK-LABEL: @ipowi_zero_exp( +// CHECK-SAME: %[[ARG0:.+]]: i32 +// CHECK-SAME: %[[ARG1:.+]]: vector<4xi32> +// CHECK-SAME: -> (i32, vector<4xi32>) { +func.func @ipowi_zero_exp(%arg0: i32, %arg1: vector<4xi32>) -> (i32, vector<4xi32>) { + // CHECK: %[[CST_S:.*]] = arith.constant 1 : i32 + // CHECK: %[[CST_V:.*]] = arith.constant dense<1> : vector<4xi32> + // CHECK: return %[[CST_S]], %[[CST_V]] + %c = arith.constant 0 : i32 + %v = arith.constant dense <0> : vector<4xi32> + %0 = math.ipowi %arg0, %c : i32 + %1 = math.ipowi %arg1, %v : vector<4xi32> + return %0, %1 : i32, vector<4xi32> +} + +// CHECK-LABEL: @ipowi_exp_one( +// CHECK-SAME: %[[ARG0:.+]]: i32 +// CHECK-SAME: %[[ARG1:.+]]: vector<4xi32> +// CHECK-SAME: -> (i32, vector<4xi32>, i32, vector<4xi32>) { +func.func @ipowi_exp_one(%arg0: i32, %arg1: vector<4xi32>) -> (i32, vector<4xi32>, i32, vector<4xi32>) { + // CHECK: %[[CST_S:.*]] = arith.constant 1 : i32 + // CHECK: %[[CST_V:.*]] = arith.constant dense<1> : vector<4xi32> + // CHECK: %[[SCALAR:.*]] = arith.divsi %[[CST_S]], %[[ARG0]] + // CHECK: %[[VECTOR:.*]] = arith.divsi %[[CST_V]], %[[ARG1]] + // CHECK: return %[[ARG0]], %[[ARG1]], %[[SCALAR]], %[[VECTOR]] + %c1 = arith.constant 1 : i32 + %v1 = arith.constant dense <1> : vector<4xi32> + %0 = math.ipowi %arg0, %c1 : i32 + %1 = math.ipowi %arg1, %v1 : vector<4xi32> + %cm1 = arith.constant -1 : i32 + %vm1 = arith.constant dense <-1> : vector<4xi32> + %2 = math.ipowi %arg0, %cm1 : i32 + %3 = math.ipowi %arg1, %vm1 : vector<4xi32> + return %0, %1, %2, %3 : i32, vector<4xi32>, i32, vector<4xi32> +} + +// CHECK-LABEL: @ipowi_exp_two( +// CHECK-SAME: %[[ARG0:.+]]: i32 +// CHECK-SAME: %[[ARG1:.+]]: vector<4xi32> +// CHECK-SAME: -> (i32, vector<4xi32>, i32, vector<4xi32>) { +func.func @ipowi_exp_two(%arg0: i32, %arg1: vector<4xi32>) -> (i32, vector<4xi32>, i32, vector<4xi32>) { + // CHECK: %[[CST_S:.*]] = arith.constant 1 : i32 + // CHECK: %[[CST_V:.*]] = arith.constant dense<1> : vector<4xi32> + // CHECK: %[[SCALAR0:.*]] = arith.muli %[[ARG0]], %[[ARG0]] + // CHECK: %[[VECTOR0:.*]] = arith.muli %[[ARG1]], %[[ARG1]] + // CHECK: %[[SCALAR1:.*]] = arith.divsi %[[CST_S]], %[[ARG0]] + // CHECK: %[[SMUL:.*]] = arith.muli %[[SCALAR1]], %[[SCALAR1]] + // CHECK: %[[VECTOR1:.*]] = arith.divsi %[[CST_V]], %[[ARG1]] + // CHECK: %[[VMUL:.*]] = arith.muli %[[VECTOR1]], %[[VECTOR1]] + // CHECK: return %[[SCALAR0]], %[[VECTOR0]], %[[SMUL]], %[[VMUL]] + %c1 = arith.constant 2 : i32 + %v1 = arith.constant dense <2> : vector<4xi32> + %0 = math.ipowi %arg0, %c1 : i32 + %1 = math.ipowi %arg1, %v1 : vector<4xi32> + %cm1 = arith.constant -2 : i32 + %vm1 = arith.constant dense <-2> : vector<4xi32> + %2 = math.ipowi %arg0, %cm1 : i32 + %3 = math.ipowi %arg1, %vm1 : vector<4xi32> + return %0, %1, %2, %3 : i32, vector<4xi32>, i32, vector<4xi32> +} + +// CHECK-LABEL: @ipowi_exp_three( +// CHECK-SAME: %[[ARG0:.+]]: i32 +// CHECK-SAME: %[[ARG1:.+]]: vector<4xi32> +// CHECK-SAME: -> (i32, vector<4xi32>, i32, vector<4xi32>) { +func.func @ipowi_exp_three(%arg0: i32, %arg1: vector<4xi32>) -> (i32, vector<4xi32>, i32, vector<4xi32>) { + // CHECK: %[[CST_S:.*]] = arith.constant 1 : i32 + // CHECK: %[[CST_V:.*]] = arith.constant dense<1> : vector<4xi32> + // CHECK: %[[SMUL0:.*]] = arith.muli %[[ARG0]], %[[ARG0]] + // CHECK: %[[SCALAR0:.*]] = arith.muli %[[SMUL0]], %[[ARG0]] + // CHECK: %[[VMUL0:.*]] = arith.muli %[[ARG1]], %[[ARG1]] + // CHECK: %[[VECTOR0:.*]] = arith.muli %[[VMUL0]], %[[ARG1]] + // CHECK: %[[SCALAR1:.*]] = arith.divsi %[[CST_S]], %[[ARG0]] + // CHECK: %[[SMUL1:.*]] = arith.muli %[[SCALAR1]], %[[SCALAR1]] + // CHECK: %[[SMUL2:.*]] = arith.muli %[[SMUL1]], %[[SCALAR1]] + // CHECK: %[[VECTOR1:.*]] = arith.divsi %[[CST_V]], %[[ARG1]] + // CHECK: %[[VMUL1:.*]] = arith.muli %[[VECTOR1]], %[[VECTOR1]] + // CHECK: %[[VMUL2:.*]] = arith.muli %[[VMUL1]], %[[VECTOR1]] + // CHECK: return %[[SCALAR0]], %[[VECTOR0]], %[[SMUL2]], %[[VMUL2]] + %c1 = arith.constant 3 : i32 + %v1 = arith.constant dense <3> : vector<4xi32> + %0 = math.ipowi %arg0, %c1 : i32 + %1 = math.ipowi %arg1, %v1 : vector<4xi32> + %cm1 = arith.constant -3 : i32 + %vm1 = arith.constant dense <-3> : vector<4xi32> + %2 = math.ipowi %arg0, %cm1 : i32 + %3 = math.ipowi %arg1, %vm1 : vector<4xi32> + return %0, %1, %2, %3 : i32, vector<4xi32>, i32, vector<4xi32> +} + +// CHECK-LABEL: @fpowi_zero_exp( +// CHECK-SAME: %[[ARG0:.+]]: f32 +// CHECK-SAME: %[[ARG1:.+]]: vector<4xf32> +// CHECK-SAME: -> (f32, vector<4xf32>) { +func.func @fpowi_zero_exp(%arg0: f32, %arg1: vector<4xf32>) -> (f32, vector<4xf32>) { + // CHECK: %[[CST_S:.*]] = arith.constant 1.000000e+00 : f32 + // CHECK: %[[CST_V:.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> + // CHECK: return %[[CST_S]], %[[CST_V]] + %c = arith.constant 0 : i32 + %v = arith.constant dense <0> : vector<4xi32> + %0 = math.fpowi %arg0, %c : f32, i32 + %1 = math.fpowi %arg1, %v : vector<4xf32>, vector<4xi32> + return %0, %1 : f32, vector<4xf32> +} + +// CHECK-LABEL: @fpowi_exp_one( +// CHECK-SAME: %[[ARG0:.+]]: f32 +// CHECK-SAME: %[[ARG1:.+]]: vector<4xf32> +// CHECK-SAME: -> (f32, vector<4xf32>, f32, vector<4xf32>) { +func.func @fpowi_exp_one(%arg0: f32, %arg1: vector<4xf32>) -> (f32, vector<4xf32>, f32, vector<4xf32>) { + // CHECK: %[[CST_S:.*]] = arith.constant 1.000000e+00 : f32 + // CHECK: %[[CST_V:.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> + // CHECK: %[[SCALAR:.*]] = arith.divf %[[CST_S]], %[[ARG0]] + // CHECK: %[[VECTOR:.*]] = arith.divf %[[CST_V]], %[[ARG1]] + // CHECK: return %[[ARG0]], %[[ARG1]], %[[SCALAR]], %[[VECTOR]] + %c1 = arith.constant 1 : i32 + %v1 = arith.constant dense <1> : vector<4xi32> + %0 = math.fpowi %arg0, %c1 : f32, i32 + %1 = math.fpowi %arg1, %v1 : vector<4xf32>, vector<4xi32> + %cm1 = arith.constant -1 : i32 + %vm1 = arith.constant dense <-1> : vector<4xi32> + %2 = math.fpowi %arg0, %cm1 : f32, i32 + %3 = math.fpowi %arg1, %vm1 : vector<4xf32>, vector<4xi32> + return %0, %1, %2, %3 : f32, vector<4xf32>, f32, vector<4xf32> +} + +// CHECK-LABEL: @fpowi_exp_two( +// CHECK-SAME: %[[ARG0:.+]]: f32 +// CHECK-SAME: %[[ARG1:.+]]: vector<4xf32> +// CHECK-SAME: -> (f32, vector<4xf32>, f32, vector<4xf32>) { +func.func @fpowi_exp_two(%arg0: f32, %arg1: vector<4xf32>) -> (f32, vector<4xf32>, f32, vector<4xf32>) { + // CHECK: %[[CST_S:.*]] = arith.constant 1.000000e+00 : f32 + // CHECK: %[[CST_V:.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> + // CHECK: %[[SCALAR0:.*]] = arith.mulf %[[ARG0]], %[[ARG0]] + // CHECK: %[[VECTOR0:.*]] = arith.mulf %[[ARG1]], %[[ARG1]] + // CHECK: %[[SCALAR1:.*]] = arith.divf %[[CST_S]], %[[ARG0]] + // CHECK: %[[SMUL:.*]] = arith.mulf %[[SCALAR1]], %[[SCALAR1]] + // CHECK: %[[VECTOR1:.*]] = arith.divf %[[CST_V]], %[[ARG1]] + // CHECK: %[[VMUL:.*]] = arith.mulf %[[VECTOR1]], %[[VECTOR1]] + // CHECK: return %[[SCALAR0]], %[[VECTOR0]], %[[SMUL]], %[[VMUL]] + %c1 = arith.constant 2 : i32 + %v1 = arith.constant dense <2> : vector<4xi32> + %0 = math.fpowi %arg0, %c1 : f32, i32 + %1 = math.fpowi %arg1, %v1 : vector<4xf32>, vector<4xi32> + %cm1 = arith.constant -2 : i32 + %vm1 = arith.constant dense <-2> : vector<4xi32> + %2 = math.fpowi %arg0, %cm1 : f32, i32 + %3 = math.fpowi %arg1, %vm1 : vector<4xf32>, vector<4xi32> + return %0, %1, %2, %3 : f32, vector<4xf32>, f32, vector<4xf32> +} + +// CHECK-LABEL: @fpowi_exp_three( +// CHECK-SAME: %[[ARG0:.+]]: f32 +// CHECK-SAME: %[[ARG1:.+]]: vector<4xf32> +// CHECK-SAME: -> (f32, vector<4xf32>, f32, vector<4xf32>) { +func.func @fpowi_exp_three(%arg0: f32, %arg1: vector<4xf32>) -> (f32, vector<4xf32>, f32, vector<4xf32>) { + // CHECK: %[[CST_S:.*]] = arith.constant 1.000000e+00 : f32 + // CHECK: %[[CST_V:.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> + // CHECK: %[[SMUL0:.*]] = arith.mulf %[[ARG0]], %[[ARG0]] + // CHECK: %[[SCALAR0:.*]] = arith.mulf %[[SMUL0]], %[[ARG0]] + // CHECK: %[[VMUL0:.*]] = arith.mulf %[[ARG1]], %[[ARG1]] + // CHECK: %[[VECTOR0:.*]] = arith.mulf %[[VMUL0]], %[[ARG1]] + // CHECK: %[[SCALAR1:.*]] = arith.divf %[[CST_S]], %[[ARG0]] + // CHECK: %[[SMUL1:.*]] = arith.mulf %[[SCALAR1]], %[[SCALAR1]] + // CHECK: %[[SMUL2:.*]] = arith.mulf %[[SMUL1]], %[[SCALAR1]] + // CHECK: %[[VECTOR1:.*]] = arith.divf %[[CST_V]], %[[ARG1]] + // CHECK: %[[VMUL1:.*]] = arith.mulf %[[VECTOR1]], %[[VECTOR1]] + // CHECK: %[[VMUL2:.*]] = arith.mulf %[[VMUL1]], %[[VECTOR1]] + // CHECK: return %[[SCALAR0]], %[[VECTOR0]], %[[SMUL2]], %[[VMUL2]] + %c1 = arith.constant 3 : i32 + %v1 = arith.constant dense <3> : vector<4xi32> + %0 = math.fpowi %arg0, %c1 : f32, i32 + %1 = math.fpowi %arg1, %v1 : vector<4xf32>, vector<4xi32> + %cm1 = arith.constant -3 : i32 + %vm1 = arith.constant dense <-3> : vector<4xi32> + %2 = math.fpowi %arg0, %cm1 : f32, i32 + %3 = math.fpowi %arg1, %vm1 : vector<4xf32>, vector<4xi32> + return %0, %1, %2, %3 : f32, vector<4xf32>, f32, vector<4xf32> +} diff --git a/mlir/test/Dialect/Math/canonicalize_ipowi.mlir b/mlir/test/Dialect/Math/canonicalize_ipowi.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Math/canonicalize_ipowi.mlir @@ -0,0 +1,442 @@ +// RUN: mlir-opt %s -canonicalize | FileCheck %s + +// CHECK-LABEL: @ipowi32_fold( +// CHECK-SAME: %[[result:.+]]: memref +func.func @ipowi32_fold(%result : memref) { +// CHECK-DAG: %[[cst0:.+]] = arith.constant 0 : i32 +// CHECK-DAG: %[[cst1:.+]] = arith.constant 1 : i32 +// CHECK-DAG: %[[cst1073741824:.+]] = arith.constant 1073741824 : i32 +// CHECK-DAG: %[[cst_m1:.+]] = arith.constant -1 : i32 +// CHECK-DAG: %[[cst_m27:.+]] = arith.constant -27 : i32 +// CHECK-DAG: %[[i0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[i1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[i2:.+]] = arith.constant 2 : index +// CHECK-DAG: %[[i3:.+]] = arith.constant 3 : index +// CHECK-DAG: %[[i4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[i5:.+]] = arith.constant 5 : index +// CHECK-DAG: %[[i6:.+]] = arith.constant 6 : index +// CHECK-DAG: %[[i7:.+]] = arith.constant 7 : index +// CHECK-DAG: %[[i8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[i9:.+]] = arith.constant 9 : index +// CHECK-DAG: %[[i10:.+]] = arith.constant 10 : index +// CHECK-DAG: %[[i11:.+]] = arith.constant 11 : index + +// --- Test power == 0 --- + %arg0_base = arith.constant 0 : i32 + %arg0_power = arith.constant 0 : i32 + %res0 = math.ipowi %arg0_base, %arg0_power : i32 + %i0 = arith.constant 0 : index + memref.store %res0, %result[%i0] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i0]]] : memref + + %arg1_base = arith.constant 10 : i32 + %arg1_power = arith.constant 0 : i32 + %res1 = math.ipowi %arg1_base, %arg1_power : i32 + %i1 = arith.constant 1 : index + memref.store %res1, %result[%i1] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i1]]] : memref + + %arg2_base = arith.constant -10 : i32 + %arg2_power = arith.constant 0 : i32 + %res2 = math.ipowi %arg2_base, %arg2_power : i32 + %i2 = arith.constant 2 : index + memref.store %res2, %result[%i2] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i2]]] : memref + +// --- Test negative powers --- + %arg3_base = arith.constant 0 : i32 + %arg3_power = arith.constant -1 : i32 + %res3 = math.ipowi %arg3_base, %arg3_power : i32 + %i3 = arith.constant 3 : index + memref.store %res3, %result[%i3] : memref +// No folding for ipowi(0, x) for x < 0: +// CHECK: %[[res3:.+]] = math.ipowi %[[cst0]], %[[cst_m1]] : i32 +// CHECK: memref.store %[[res3]], %[[result]][%[[i3]]] : memref + + %arg4_base = arith.constant 1 : i32 + %arg4_power = arith.constant -10 : i32 + %res4 = math.ipowi %arg4_base, %arg4_power : i32 + %i4 = arith.constant 4 : index + memref.store %res4, %result[%i4] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i4]]] : memref + + %arg5_base = arith.constant 2 : i32 + %arg5_power = arith.constant -1 : i32 + %res5 = math.ipowi %arg5_base, %arg5_power : i32 + %i5 = arith.constant 5 : index + memref.store %res5, %result[%i5] : memref +// CHECK: memref.store %[[cst0]], %[[result]][%[[i5]]] : memref + + %arg6_base = arith.constant -2 : i32 + %arg6_power = arith.constant -1 : i32 + %res6 = math.ipowi %arg6_base, %arg6_power : i32 + %i6 = arith.constant 6 : index + memref.store %res6, %result[%i6] : memref +// CHECK: memref.store %[[cst0]], %[[result]][%[[i6]]] : memref + + %arg7_base = arith.constant -1 : i32 + %arg7_power = arith.constant -10 : i32 + %res7 = math.ipowi %arg7_base, %arg7_power : i32 + %i7 = arith.constant 7 : index + memref.store %res7, %result[%i7] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i7]]] : memref + + %arg8_base = arith.constant -1 : i32 + %arg8_power = arith.constant -11 : i32 + %res8 = math.ipowi %arg8_base, %arg8_power : i32 + %i8 = arith.constant 8 : index + memref.store %res8, %result[%i8] : memref +// CHECK: memref.store %[[cst_m1]], %[[result]][%[[i8]]] : memref + +// --- Test positive powers --- + %arg9_base = arith.constant -3 : i32 + %arg9_power = arith.constant 3 : i32 + %res9 = math.ipowi %arg9_base, %arg9_power : i32 + %i9 = arith.constant 9 : index + memref.store %res9, %result[%i9] : memref +// CHECK: memref.store %[[cst_m27]], %[[result]][%[[i9]]] : memref + + %arg10_base = arith.constant 2 : i32 + %arg10_power = arith.constant 30 : i32 + %res10 = math.ipowi %arg10_base, %arg10_power : i32 + %i10 = arith.constant 10 : index + memref.store %res10, %result[%i10] : memref +// CHECK: memref.store %[[cst1073741824]], %[[result]][%[[i10]]] : memref + +// --- Test vector folding --- + %arg11_base = arith.constant 2 : i32 + %arg11_base_vec = vector.splat %arg11_base : vector<2x2xi32> + %arg11_power = arith.constant 30 : i32 + %arg11_power_vec = vector.splat %arg11_power : vector<2x2xi32> + %res11_vec = math.ipowi %arg11_base_vec, %arg11_power_vec : vector<2x2xi32> + %i11 = arith.constant 11 : index + %res11 = vector.extract %res11_vec[1, 1] : vector<2x2xi32> + memref.store %res11, %result[%i11] : memref +// CHECK: memref.store %[[cst1073741824]], %[[result]][%[[i11]]] : memref + + return +} + +// CHECK-LABEL: @ipowi64_fold( +// CHECK-SAME: %[[result:.+]]: memref +func.func @ipowi64_fold(%result : memref) { +// CHECK-DAG: %[[cst0:.+]] = arith.constant 0 : i64 +// CHECK-DAG: %[[cst1:.+]] = arith.constant 1 : i64 +// CHECK-DAG: %[[cst1073741824:.+]] = arith.constant 1073741824 : i64 +// CHECK-DAG: %[[cst281474976710656:.+]] = arith.constant 281474976710656 : i64 +// CHECK-DAG: %[[cst_m1:.+]] = arith.constant -1 : i64 +// CHECK-DAG: %[[cst_m27:.+]] = arith.constant -27 : i64 +// CHECK-DAG: %[[i0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[i1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[i2:.+]] = arith.constant 2 : index +// CHECK-DAG: %[[i3:.+]] = arith.constant 3 : index +// CHECK-DAG: %[[i4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[i5:.+]] = arith.constant 5 : index +// CHECK-DAG: %[[i6:.+]] = arith.constant 6 : index +// CHECK-DAG: %[[i7:.+]] = arith.constant 7 : index +// CHECK-DAG: %[[i8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[i9:.+]] = arith.constant 9 : index +// CHECK-DAG: %[[i10:.+]] = arith.constant 10 : index +// CHECK-DAG: %[[i11:.+]] = arith.constant 11 : index + +// --- Test power == 0 --- + %arg0_base = arith.constant 0 : i64 + %arg0_power = arith.constant 0 : i64 + %res0 = math.ipowi %arg0_base, %arg0_power : i64 + %i0 = arith.constant 0 : index + memref.store %res0, %result[%i0] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i0]]] : memref + + %arg1_base = arith.constant 10 : i64 + %arg1_power = arith.constant 0 : i64 + %res1 = math.ipowi %arg1_base, %arg1_power : i64 + %i1 = arith.constant 1 : index + memref.store %res1, %result[%i1] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i1]]] : memref + + %arg2_base = arith.constant -10 : i64 + %arg2_power = arith.constant 0 : i64 + %res2 = math.ipowi %arg2_base, %arg2_power : i64 + %i2 = arith.constant 2 : index + memref.store %res2, %result[%i2] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i2]]] : memref + +// --- Test negative powers --- + %arg3_base = arith.constant 0 : i64 + %arg3_power = arith.constant -1 : i64 + %res3 = math.ipowi %arg3_base, %arg3_power : i64 + %i3 = arith.constant 3 : index + memref.store %res3, %result[%i3] : memref +// No folding for ipowi(0, x) for x < 0: +// CHECK: %[[res3:.+]] = math.ipowi %[[cst0]], %[[cst_m1]] : i64 +// CHECK: memref.store %[[res3]], %[[result]][%[[i3]]] : memref + + %arg4_base = arith.constant 1 : i64 + %arg4_power = arith.constant -10 : i64 + %res4 = math.ipowi %arg4_base, %arg4_power : i64 + %i4 = arith.constant 4 : index + memref.store %res4, %result[%i4] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i4]]] : memref + + %arg5_base = arith.constant 2 : i64 + %arg5_power = arith.constant -1 : i64 + %res5 = math.ipowi %arg5_base, %arg5_power : i64 + %i5 = arith.constant 5 : index + memref.store %res5, %result[%i5] : memref +// CHECK: memref.store %[[cst0]], %[[result]][%[[i5]]] : memref + + %arg6_base = arith.constant -2 : i64 + %arg6_power = arith.constant -1 : i64 + %res6 = math.ipowi %arg6_base, %arg6_power : i64 + %i6 = arith.constant 6 : index + memref.store %res6, %result[%i6] : memref +// CHECK: memref.store %[[cst0]], %[[result]][%[[i6]]] : memref + + %arg7_base = arith.constant -1 : i64 + %arg7_power = arith.constant -10 : i64 + %res7 = math.ipowi %arg7_base, %arg7_power : i64 + %i7 = arith.constant 7 : index + memref.store %res7, %result[%i7] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i7]]] : memref + + %arg8_base = arith.constant -1 : i64 + %arg8_power = arith.constant -11 : i64 + %res8 = math.ipowi %arg8_base, %arg8_power : i64 + %i8 = arith.constant 8 : index + memref.store %res8, %result[%i8] : memref +// CHECK: memref.store %[[cst_m1]], %[[result]][%[[i8]]] : memref + +// --- Test positive powers --- + %arg9_base = arith.constant -3 : i64 + %arg9_power = arith.constant 3 : i64 + %res9 = math.ipowi %arg9_base, %arg9_power : i64 + %i9 = arith.constant 9 : index + memref.store %res9, %result[%i9] : memref +// CHECK: memref.store %[[cst_m27]], %[[result]][%[[i9]]] : memref + + %arg10_base = arith.constant 2 : i64 + %arg10_power = arith.constant 30 : i64 + %res10 = math.ipowi %arg10_base, %arg10_power : i64 + %i10 = arith.constant 10 : index + memref.store %res10, %result[%i10] : memref +// CHECK: memref.store %[[cst1073741824]], %[[result]][%[[i10]]] : memref + + %arg11_base = arith.constant 2 : i64 + %arg11_power = arith.constant 48 : i64 + %res11 = math.ipowi %arg11_base, %arg11_power : i64 + %i11 = arith.constant 11 : index + memref.store %res11, %result[%i11] : memref +// CHECK: memref.store %[[cst281474976710656]], %[[result]][%[[i11]]] : memref + + return +} + +// CHECK-LABEL: @ipowi16_fold( +// CHECK-SAME: %[[result:.+]]: memref +func.func @ipowi16_fold(%result : memref) { +// CHECK-DAG: %[[cst0:.+]] = arith.constant 0 : i16 +// CHECK-DAG: %[[cst1:.+]] = arith.constant 1 : i16 +// CHECK-DAG: %[[cst16384:.+]] = arith.constant 16384 : i16 +// CHECK-DAG: %[[cst_m1:.+]] = arith.constant -1 : i16 +// CHECK-DAG: %[[cst_m27:.+]] = arith.constant -27 : i16 +// CHECK-DAG: %[[i0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[i1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[i2:.+]] = arith.constant 2 : index +// CHECK-DAG: %[[i3:.+]] = arith.constant 3 : index +// CHECK-DAG: %[[i4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[i5:.+]] = arith.constant 5 : index +// CHECK-DAG: %[[i6:.+]] = arith.constant 6 : index +// CHECK-DAG: %[[i7:.+]] = arith.constant 7 : index +// CHECK-DAG: %[[i8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[i9:.+]] = arith.constant 9 : index +// CHECK-DAG: %[[i10:.+]] = arith.constant 10 : index + +// --- Test power == 0 --- + %arg0_base = arith.constant 0 : i16 + %arg0_power = arith.constant 0 : i16 + %res0 = math.ipowi %arg0_base, %arg0_power : i16 + %i0 = arith.constant 0 : index + memref.store %res0, %result[%i0] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i0]]] : memref + + %arg1_base = arith.constant 10 : i16 + %arg1_power = arith.constant 0 : i16 + %res1 = math.ipowi %arg1_base, %arg1_power : i16 + %i1 = arith.constant 1 : index + memref.store %res1, %result[%i1] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i1]]] : memref + + %arg2_base = arith.constant -10 : i16 + %arg2_power = arith.constant 0 : i16 + %res2 = math.ipowi %arg2_base, %arg2_power : i16 + %i2 = arith.constant 2 : index + memref.store %res2, %result[%i2] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i2]]] : memref + +// --- Test negative powers --- + %arg3_base = arith.constant 0 : i16 + %arg3_power = arith.constant -1 : i16 + %res3 = math.ipowi %arg3_base, %arg3_power : i16 + %i3 = arith.constant 3 : index + memref.store %res3, %result[%i3] : memref +// No folding for ipowi(0, x) for x < 0: +// CHECK: %[[res3:.+]] = math.ipowi %[[cst0]], %[[cst_m1]] : i16 +// CHECK: memref.store %[[res3]], %[[result]][%[[i3]]] : memref + + %arg4_base = arith.constant 1 : i16 + %arg4_power = arith.constant -10 : i16 + %res4 = math.ipowi %arg4_base, %arg4_power : i16 + %i4 = arith.constant 4 : index + memref.store %res4, %result[%i4] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i4]]] : memref + + %arg5_base = arith.constant 2 : i16 + %arg5_power = arith.constant -1 : i16 + %res5 = math.ipowi %arg5_base, %arg5_power : i16 + %i5 = arith.constant 5 : index + memref.store %res5, %result[%i5] : memref +// CHECK: memref.store %[[cst0]], %[[result]][%[[i5]]] : memref + + %arg6_base = arith.constant -2 : i16 + %arg6_power = arith.constant -1 : i16 + %res6 = math.ipowi %arg6_base, %arg6_power : i16 + %i6 = arith.constant 6 : index + memref.store %res6, %result[%i6] : memref +// CHECK: memref.store %[[cst0]], %[[result]][%[[i6]]] : memref + + %arg7_base = arith.constant -1 : i16 + %arg7_power = arith.constant -10 : i16 + %res7 = math.ipowi %arg7_base, %arg7_power : i16 + %i7 = arith.constant 7 : index + memref.store %res7, %result[%i7] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i7]]] : memref + + %arg8_base = arith.constant -1 : i16 + %arg8_power = arith.constant -11 : i16 + %res8 = math.ipowi %arg8_base, %arg8_power : i16 + %i8 = arith.constant 8 : index + memref.store %res8, %result[%i8] : memref +// CHECK: memref.store %[[cst_m1]], %[[result]][%[[i8]]] : memref + +// --- Test positive powers --- + %arg9_base = arith.constant -3 : i16 + %arg9_power = arith.constant 3 : i16 + %res9 = math.ipowi %arg9_base, %arg9_power : i16 + %i9 = arith.constant 9 : index + memref.store %res9, %result[%i9] : memref +// CHECK: memref.store %[[cst_m27]], %[[result]][%[[i9]]] : memref + + %arg10_base = arith.constant 2 : i16 + %arg10_power = arith.constant 14 : i16 + %res10 = math.ipowi %arg10_base, %arg10_power : i16 + %i10 = arith.constant 10 : index + memref.store %res10, %result[%i10] : memref +// CHECK: memref.store %[[cst16384]], %[[result]][%[[i10]]] : memref + + return +} + +// CHECK-LABEL: @ipowi8_fold( +// CHECK-SAME: %[[result:.+]]: memref +func.func @ipowi8_fold(%result : memref) { +// CHECK-DAG: %[[cst0:.+]] = arith.constant 0 : i8 +// CHECK-DAG: %[[cst1:.+]] = arith.constant 1 : i8 +// CHECK-DAG: %[[cst64:.+]] = arith.constant 64 : i8 +// CHECK-DAG: %[[cst_m1:.+]] = arith.constant -1 : i8 +// CHECK-DAG: %[[cst_m27:.+]] = arith.constant -27 : i8 +// CHECK-DAG: %[[i0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[i1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[i2:.+]] = arith.constant 2 : index +// CHECK-DAG: %[[i3:.+]] = arith.constant 3 : index +// CHECK-DAG: %[[i4:.+]] = arith.constant 4 : index +// CHECK-DAG: %[[i5:.+]] = arith.constant 5 : index +// CHECK-DAG: %[[i6:.+]] = arith.constant 6 : index +// CHECK-DAG: %[[i7:.+]] = arith.constant 7 : index +// CHECK-DAG: %[[i8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[i9:.+]] = arith.constant 9 : index +// CHECK-DAG: %[[i10:.+]] = arith.constant 10 : index + +// --- Test power == 0 --- + %arg0_base = arith.constant 0 : i8 + %arg0_power = arith.constant 0 : i8 + %res0 = math.ipowi %arg0_base, %arg0_power : i8 + %i0 = arith.constant 0 : index + memref.store %res0, %result[%i0] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i0]]] : memref + + %arg1_base = arith.constant 10 : i8 + %arg1_power = arith.constant 0 : i8 + %res1 = math.ipowi %arg1_base, %arg1_power : i8 + %i1 = arith.constant 1 : index + memref.store %res1, %result[%i1] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i1]]] : memref + + %arg2_base = arith.constant -10 : i8 + %arg2_power = arith.constant 0 : i8 + %res2 = math.ipowi %arg2_base, %arg2_power : i8 + %i2 = arith.constant 2 : index + memref.store %res2, %result[%i2] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i2]]] : memref + +// --- Test negative powers --- + %arg3_base = arith.constant 0 : i8 + %arg3_power = arith.constant -1 : i8 + %res3 = math.ipowi %arg3_base, %arg3_power : i8 + %i3 = arith.constant 3 : index + memref.store %res3, %result[%i3] : memref +// No folding for ipowi(0, x) for x < 0: +// CHECK: %[[res3:.+]] = math.ipowi %[[cst0]], %[[cst_m1]] : i8 +// CHECK: memref.store %[[res3]], %[[result]][%[[i3]]] : memref + + %arg4_base = arith.constant 1 : i8 + %arg4_power = arith.constant -10 : i8 + %res4 = math.ipowi %arg4_base, %arg4_power : i8 + %i4 = arith.constant 4 : index + memref.store %res4, %result[%i4] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i4]]] : memref + + %arg5_base = arith.constant 2 : i8 + %arg5_power = arith.constant -1 : i8 + %res5 = math.ipowi %arg5_base, %arg5_power : i8 + %i5 = arith.constant 5 : index + memref.store %res5, %result[%i5] : memref +// CHECK: memref.store %[[cst0]], %[[result]][%[[i5]]] : memref + + %arg6_base = arith.constant -2 : i8 + %arg6_power = arith.constant -1 : i8 + %res6 = math.ipowi %arg6_base, %arg6_power : i8 + %i6 = arith.constant 6 : index + memref.store %res6, %result[%i6] : memref +// CHECK: memref.store %[[cst0]], %[[result]][%[[i6]]] : memref + + %arg7_base = arith.constant -1 : i8 + %arg7_power = arith.constant -10 : i8 + %res7 = math.ipowi %arg7_base, %arg7_power : i8 + %i7 = arith.constant 7 : index + memref.store %res7, %result[%i7] : memref +// CHECK: memref.store %[[cst1]], %[[result]][%[[i7]]] : memref + + %arg8_base = arith.constant -1 : i8 + %arg8_power = arith.constant -11 : i8 + %res8 = math.ipowi %arg8_base, %arg8_power : i8 + %i8 = arith.constant 8 : index + memref.store %res8, %result[%i8] : memref +// CHECK: memref.store %[[cst_m1]], %[[result]][%[[i8]]] : memref + +// --- Test positive powers --- + %arg9_base = arith.constant -3 : i8 + %arg9_power = arith.constant 3 : i8 + %res9 = math.ipowi %arg9_base, %arg9_power : i8 + %i9 = arith.constant 9 : index + memref.store %res9, %result[%i9] : memref +// CHECK: memref.store %[[cst_m27]], %[[result]][%[[i9]]] : memref + + %arg10_base = arith.constant 2 : i8 + %arg10_power = arith.constant 6 : i8 + %res10 = math.ipowi %arg10_base, %arg10_power : i8 + %i10 = arith.constant 10 : index + memref.store %res10, %result[%i10] : memref +// CHECK: memref.store %[[cst64]], %[[result]][%[[i10]]] : memref + + return +} diff --git a/mlir/test/Dialect/Math/ops.mlir b/mlir/test/Dialect/Math/ops.mlir --- a/mlir/test/Dialect/Math/ops.mlir +++ b/mlir/test/Dialect/Math/ops.mlir @@ -158,6 +158,20 @@ return } +// CHECK-LABEL: func @fpowi( +// CHECK-SAME: %[[SB:.*]]: f32, %[[SP:.*]]: i32, +// CHECK-SAME: %[[VB:.*]]: vector<4xf64>, %[[VP:.*]]: vector<4xi16>, +// CHECK-SAME: %[[TB:.*]]: tensor<4x3x?xf16>, %[[TP:.*]]: tensor<4x3x?xi64>) { +func.func @fpowi(%b: f32, %p: i32, %vb: vector<4xf64>, %vp: vector<4xi16>, %tb: tensor<4x3x?xf16>, %tp: tensor<4x3x?xi64>) { +// CHECK: {{.*}} = math.fpowi %[[SB]], %[[SP]] : f32, i32 + %0 = math.fpowi %b, %p : f32, i32 +// CHECK: {{.*}} = math.fpowi %[[VB]], %[[VP]] : vector<4xf64>, vector<4xi16> + %1 = math.fpowi %vb, %vp : vector<4xf64>, vector<4xi16> +// CHECK: {{.*}} = math.fpowi %[[TB]], %[[TP]] : tensor<4x3x?xf16>, tensor<4x3x?xi64> + %2 = math.fpowi %tb, %tp : tensor<4x3x?xf16>, tensor<4x3x?xi64> + return +} + // CHECK-LABEL: func @rsqrt( // CHECK-SAME: %[[F:.*]]: f32, %[[V:.*]]: vector<4xf32>, %[[T:.*]]: tensor<4x4x?xf32>) func.func @rsqrt(%f: f32, %v: vector<4xf32>, %t: tensor<4x4x?xf32>) { diff --git a/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir b/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir --- a/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir +++ b/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir @@ -18,3 +18,12 @@ return %d : vector<2x2xf32> } // ----- + +// Negative test for non f32 case. +// CHECK-LABEL: mma_sync_f16 +// CHECK-NOT: tf32Enabled +// CHECK: return +func.func @mma_sync_f16(%arg0: vector<4x2xf16>, %arg1: vector<2x2xf16>, %arg2: vector<2x2xf16>) -> vector<2x2xf16> { + %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + return %d : vector<2x2xf16> +} diff --git a/mlir/test/Dialect/SPIRV/IR/joint-matrix-ops.mlir b/mlir/test/Dialect/SPIRV/IR/joint-matrix-ops.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SPIRV/IR/joint-matrix-ops.mlir @@ -0,0 +1,158 @@ +// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -verify-diagnostics %s | FileCheck %s + +// CHECK-LABEL: @joint_matrix_load +spv.func @joint_matrix_load(%ptr : !spv.ptr, %stride : i32) "None" { + // CHECK: {{%.*}} = spv.JointMatrixLoadINTEL {{%.*}}, {{%.*}} : (!spv.ptr, i32) -> !spv.jointmatrix<16x8xi32, RowMajor, Workgroup> + %0 = spv.JointMatrixLoadINTEL %ptr, %stride : (!spv.ptr, i32) -> !spv.jointmatrix<16x8xi32, RowMajor, Workgroup> + spv.Return +} + +// ----- +// CHECK-LABEL: @joint_matrix_load_memaccess +spv.func @joint_matrix_load_memaccess(%ptr : !spv.ptr, %stride : i32) "None" { + // CHECK: {{%.*}} = spv.JointMatrixLoadINTEL {{%.*}}, {{%.*}} {memory_access = #spv.memory_access} : (!spv.ptr, i32) -> !spv.jointmatrix<8x16xi32, ColumnMajor, Subgroup> + %0 = spv.JointMatrixLoadINTEL %ptr, %stride {memory_access = #spv.memory_access} : (!spv.ptr, i32) -> !spv.jointmatrix<8x16xi32, ColumnMajor, Subgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_load_diff_ptr_type +spv.func @joint_matrix_load_diff_ptr_type(%ptr : !spv.ptr, Workgroup>, %stride : i32) "None" { + // CHECK: {{%.*}} = spv.JointMatrixLoadINTEL {{%.*}}, {{%.*}} {memory_access = #spv.memory_access} : (!spv.ptr, Workgroup>, i32) -> !spv.jointmatrix<8x16xi32, RowMajor, Workgroup> + %0 = spv.JointMatrixLoadINTEL %ptr, %stride {memory_access = #spv.memory_access} : (!spv.ptr, Workgroup>, i32) -> !spv.jointmatrix<8x16xi32, RowMajor, Workgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_store +spv.func @joint_matrix_store(%ptr : !spv.ptr, %stride : i32, %m : !spv.jointmatrix<8x16xi32, RowMajor, Workgroup>) "None" { + // CHECK: spv.JointMatrixStoreINTEL {{%.*}}, {{%.*}}, {{%.*}} : (!spv.ptr, !spv.jointmatrix<8x16xi32, RowMajor, Workgroup>, i32) + spv.JointMatrixStoreINTEL %ptr, %m, %stride : (!spv.ptr, !spv.jointmatrix<8x16xi32, RowMajor, Workgroup>, i32) + spv.Return +} + +// CHECK-LABEL: @joint_matrix_store_memaccess +spv.func @joint_matrix_store_memaccess(%ptr : !spv.ptr, %m : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %stride : i32) "None" { + // CHECK: spv.JointMatrixStoreINTEL {{%.*}}, {{%.*}}, {{%.*}} {Volatile} : (!spv.ptr, !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, i32) + spv.JointMatrixStoreINTEL %ptr, %m, %stride {Volatile} : (!spv.ptr, !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, i32) + spv.Return +} + +// CHECK-LABEL: @joint_matrix_length +spv.func @joint_matrix_length() -> i32 "None" { + // CHECK: {{%.*}} = spv.JointMatrixWorkItemLengthINTEL : !spv.jointmatrix<8x16xi32, PackedB, Subgroup> + %0 = spv.JointMatrixWorkItemLengthINTEL : !spv.jointmatrix<8x16xi32, PackedB, Subgroup> + spv.ReturnValue %0 : i32 +} + +// CHECK-LABEL: @joint_matrix_muladd +spv.func @joint_matrix_muladd(%a : !spv.jointmatrix<8x32xi8, RowMajor, Subgroup>, %b : !spv.jointmatrix<32x8xi8, ColumnMajor, Subgroup>, %c : !spv.jointmatrix<8x8xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.JointMatrixMadINTEL {{%.*}}, {{%.*}}, {{%.*}} : !spv.jointmatrix<8x32xi8, RowMajor, Subgroup>, !spv.jointmatrix<32x8xi8, ColumnMajor, Subgroup> -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + %r = spv.JointMatrixMadINTEL %a, %b, %c : !spv.jointmatrix<8x32xi8, RowMajor, Subgroup>, !spv.jointmatrix<32x8xi8, ColumnMajor, Subgroup> -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_add +spv.func @joint_matrix_add(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.IAdd {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %r = spv.IAdd %a, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_sub +spv.func @joint_matrix_sub(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.ISub {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %r = spv.ISub %a, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_sdiv +spv.func @joint_matrix_sdiv(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.SDiv {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %r = spv.SDiv %a, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_udiv +spv.func @joint_matrix_udiv(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.UDiv {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %r = spv.UDiv %a, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_fadd +spv.func @joint_matrix_fadd(%a : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.FAdd {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + %r = spv.FAdd %a, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_fsub +spv.func @joint_matrix_fsub(%a : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.FSub {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + %r = spv.FSub %a, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + spv.Return +} + +// CHECK-LABEL: @joint_matrix_fdiv +spv.func @joint_matrix_fdiv(%a : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.FDiv {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + %r = spv.FDiv %a, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + spv.Return +} + +// ----- + +// CHECK-LABEL: @joint_matrix_access_chain +spv.func @joint_matrix_access_chain(%a : !spv.ptr, Function>) -> !spv.ptr "None" { + %0 = spv.Constant 0: i32 + // CHECK: {{%.*}} = spv.AccessChain {{%.*}}[{{%.*}}] : !spv.ptr, Function>, i32 + %1 = spv.AccessChain %a[%0] : !spv.ptr, Function>, i32 + spv.ReturnValue %1 : !spv.ptr +} + +// ----- + +spv.func @joint_matrix_muladd(%a : !spv.jointmatrix<16x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<16x8xi32, RowMajor, Subgroup>, %c : !spv.jointmatrix<8x8xi32, RowMajor, Subgroup>) "None" { + // expected-error @+1 {{'spv.JointMatrixMadINTEL' op matrix size must match}} + %r = spv.JointMatrixMadINTEL %a, %b, %c : !spv.jointmatrix<16x16xi32, RowMajor, Subgroup>, !spv.jointmatrix<16x8xi32, RowMajor, Subgroup> -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + spv.Return +} + +// ----- + +spv.func @joint_matrix_muladd(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x8xi32, RowMajor, Subgroup>, %c : !spv.jointmatrix<8x8xi32, RowMajor, Subgroup>) "None" { + // expected-error @+1 {{'spv.JointMatrixMadINTEL' op matrix size must match}} + %r = spv.JointMatrixMadINTEL %a, %b, %c : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + spv.Return +} + +// ----- + +spv.func @joint_matrix_muladd(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<16x8xi32, RowMajor, Workgroup>, %c : !spv.jointmatrix<8x8xi32, RowMajor, Subgroup>) "None" { + // expected-error @+1 {{'spv.JointMatrixMadINTEL' op matrix scope must match}} + %r = spv.JointMatrixMadINTEL %a, %b, %c : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, !spv.jointmatrix<16x8xi32, RowMajor, Workgroup> -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + spv.Return +} + +// ----- + +spv.func @joint_matrix_muladd(%a : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>, %b : !spv.jointmatrix<16x8xi32, RowMajor, Subgroup>, %c : !spv.jointmatrix<8x8xi32, RowMajor, Subgroup>) "None" { + // expected-error @+1 {{matrix element type must match}} + %r = spv.JointMatrixMadINTEL %a, %b, %c : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>, !spv.jointmatrix<16x8xi32, RowMajor, Subgroup> -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + spv.Return +} + +// ----- + +spv.func @joint_matrix_load_memaccess(%ptr : !spv.ptr, Workgroup>, %stride : i32) "None" { + // expected-error @+1 {{Pointer must point to a scalar or vector type}} + %0 = spv.JointMatrixLoadINTEL %ptr, %stride : (!spv.ptr, Workgroup>, i32)-> !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return +} + +// ----- + +spv.func @joint_matrix_load_memaccess(%ptr : !spv.ptr, %stride : i32) "None" { + // expected-error @+1 {{Pointer storage class must be Workgroup or CrossWorkgroup}} + %0 = spv.JointMatrixLoadINTEL %ptr, %stride : (!spv.ptr, i32) -> !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -0,0 +1,360 @@ +// RUN: mlir-opt %s --sparse-tensor-conversion --canonicalize --cse | FileCheck %s + +#SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> + +#SparseMatrix_P = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +// CHECK-LABEL: func.func @concat_mix_dense( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index +// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<5x4xf64> +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<5x4xf64>) +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { +// CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64> +// CHECK: %[[TMP_13:.*]] = arith.cmpf une, %[[TMP_12]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_13]] { +// CHECK: memref.store %[[TMP_12]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<5x4xf64> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_1:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_3:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref +// CHECK: %[[TMP_10:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_12:.*]] = func.call @getNextF64(%[[TMP_7]], %[[TMP_9]], %[[TMP_10]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_12]]) +// CHECK: } do { +// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_13:.*]] = arith.addi %[[TMP_12]], %[[TMP_c2]] : index +// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_15:.*]] = memref.load %[[TMP_10]][] : memref +// CHECK: memref.store %[[TMP_15]], %[[TMP_0]][%[[TMP_13]], %[[TMP_14]]] : memref<5x4xf64> +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_11:.*]] = bufferization.to_tensor %[[TMP_0]] : memref<5x4xf64> +// CHECK: return %[[TMP_11]] : tensor<5x4xf64> +// CHECK: } +func.func @concat_mix_dense(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #SparseMatrix>) -> tensor<5x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #SparseMatrix> to tensor<5x4xf64> + return %0 : tensor<5x4xf64> +} + +// CHECK-LABEL: func.func @concat_mix_sparse( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c2_i32:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c4_i32:.*]] = arith.constant 4 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK: %[[TMP_0:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref +// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { +// CHECK: memref.store %[[TMP_arg2]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_arg3]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_22:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64> +// CHECK: %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_23]] { +// CHECK: memref.store %[[TMP_22]], %[[TMP_8]][] : memref +// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_13:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_15:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_18:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref +// CHECK: %[[TMP_20:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_22:.*]] = func.call @getNextF64(%[[TMP_17]], %[[TMP_19]], %[[TMP_20]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_22]]) +// CHECK: } do { +// CHECK: %[[TMP_22:.*]] = memref.load %[[TMP_18]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_23:.*]] = arith.addi %[[TMP_22]], %[[TMP_c2]] : index +// CHECK: %[[TMP_24:.*]] = memref.load %[[TMP_18]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_23]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_17]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[TMP_21]] : !llvm.ptr +// CHECK: } +func.func @concat_mix_sparse(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #SparseMatrix>) -> tensor<5x4xf64, #SparseMatrix> { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #SparseMatrix> to tensor<5x4xf64, #SparseMatrix> + return %0 : tensor<5x4xf64, #SparseMatrix> +} + +// CHECK-LABEL: func.func @concat_mix_sparse_perm_dim1( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<4x2xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c2_i32:.*]] = arith.constant 2 : i32 +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c4_i32:.*]] = arith.constant 4 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK: %[[TMP_0:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref +// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: memref.store %[[TMP_arg2]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_arg3]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_22:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64> +// CHECK: %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_23]] { +// CHECK: memref.store %[[TMP_22]], %[[TMP_8]][] : memref +// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_13:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_15:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_18:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref +// CHECK: %[[TMP_20:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_22:.*]] = func.call @getNextF64(%[[TMP_17]], %[[TMP_19]], %[[TMP_20]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_22]]) +// CHECK: } do { +// CHECK: %[[TMP_22:.*]] = memref.load %[[TMP_18]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_18]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index +// CHECK: memref.store %[[TMP_22]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr, memref, memref, memref) -> !llvm.ptr +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_17]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () +// CHECK: return %[[TMP_21]] : !llvm.ptr +// CHECK: } +func.func @concat_mix_sparse_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #SparseMatrix_P>) -> tensor<4x5xf64, #SparseMatrix_P> { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #SparseMatrix_P> to tensor<4x5xf64, #SparseMatrix_P> + return %0 : tensor<4x5xf64, #SparseMatrix_P> +} + +// CHECK-LABEL: func.func @concat_mix_dense_perm_dim1( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<4x2xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index +// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<4x5xf64> +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<4x5xf64>) +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64> +// CHECK: %[[TMP_13:.*]] = arith.cmpf une, %[[TMP_12]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_13]] { +// CHECK: memref.store %[[TMP_12]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<4x5xf64> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_1:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_3:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref +// CHECK: %[[TMP_10:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_12:.*]] = func.call @getNextF64(%[[TMP_7]], %[[TMP_9]], %[[TMP_10]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_12]]) +// CHECK: } do { +// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_14:.*]] = arith.addi %[[TMP_13]], %[[TMP_c2]] : index +// CHECK: %[[TMP_15:.*]] = memref.load %[[TMP_10]][] : memref +// CHECK: memref.store %[[TMP_15]], %[[TMP_0]][%[[TMP_12]], %[[TMP_14]]] : memref<4x5xf64> +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_11:.*]] = bufferization.to_tensor %[[TMP_0]] : memref<4x5xf64> +// CHECK: return %[[TMP_11]] : tensor<4x5xf64> +// CHECK: } +func.func @concat_mix_dense_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #SparseMatrix_P>) -> tensor<4x5xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #SparseMatrix_P> to tensor<4x5xf64> + return %0 : tensor<4x5xf64> +} + +// CHECK-LABEL: func.func @concat_mix_dense_perm_dim1_dyn( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<3x2xf64>, +// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr) +// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32 +// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8 +// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64 +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<3x5xf64> +// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<3x5xf64> to memref +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<3x5xf64>) +// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c3]] step %[[TMP_c1]] { +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { +// CHECK: %[[TMP_13:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<3x2xf64> +// CHECK: %[[TMP_14:.*]] = arith.cmpf une, %[[TMP_13]], %[[TMP_cst]] : f64 +// CHECK: scf.if %[[TMP_14]] { +// CHECK: memref.store %[[TMP_13]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<3x5xf64> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xi8> +// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xi8> to memref +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xi8> +// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xi8> +// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_6:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_7:.*]] = memref.cast %[[TMP_6]] : memref<2xindex> to memref +// CHECK: memref.store %[[TMP_c0]], %[[TMP_6]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: memref.store %[[TMP_c1]], %[[TMP_6]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_8:.*]] = call @newSparseTensor(%[[TMP_3]], %[[TMP_5]], %[[TMP_7]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref, memref, memref, i32, i32, i32, i32, !llvm.ptr) -> !llvm.ptr +// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex> +// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref +// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref +// CHECK: scf.while : () -> () { +// CHECK: %[[TMP_13:.*]] = func.call @getNextF64(%[[TMP_8]], %[[TMP_10]], %[[TMP_11]]) : (!llvm.ptr, memref, memref) -> i1 +// CHECK: scf.condition(%[[TMP_13]]) +// CHECK: } do { +// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex> +// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex> +// CHECK: %[[TMP_15:.*]] = arith.addi %[[TMP_14]], %[[TMP_c2]] : index +// CHECK: %[[TMP_16:.*]] = memref.load %[[TMP_11]][] : memref +// CHECK: memref.store %[[TMP_16]], %[[TMP_0]][%[[TMP_13]], %[[TMP_15]]] : memref<3x5xf64> +// CHECK: scf.yield +// CHECK: } +// CHECK: call @delSparseTensorCOOF64(%[[TMP_8]]) : (!llvm.ptr) -> () +// CHECK: %[[TMP_12:.*]] = bufferization.to_tensor %[[TMP_1]] : memref +// CHECK: return %[[TMP_12]] : tensor +// CHECK: } +// CHECK: } +func.func @concat_mix_dense_perm_dim1_dyn(%arg0: tensor<3x2xf64>, %arg1: tensor<3x3xf64, #SparseMatrix>) -> tensor { + %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index} + : tensor<3x2xf64>, tensor<3x3xf64, #SparseMatrix> to tensor + return %0 : tensor +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir @@ -0,0 +1,430 @@ +// RUN: mlir-opt %s --sparse-compiler | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#MAT_C_C = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +#MAT_D_C = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}> +#MAT_C_D = #sparse_tensor.encoding<{dimLevelType = ["compressed", "dense"]}> + +#MAT_C_C_P = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +#MAT_C_D_P = #sparse_tensor.encoding<{ + dimLevelType = [ "compressed", "dense" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +#MAT_D_C_P = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ], + dimOrdering = affine_map<(i,j) -> (j,i)> +}> + +module { + // + // Tests without permutation. + // + + // Concats all sparse matrices (with different encodings) to a sparse matrix. + func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #MAT_C_C>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C> + return %0 : tensor<9x4xf64, #MAT_C_C> + } + + // Concats all sparse matrices (with different encodings) to a dense matrix. + func.func @concat_sparse_dense(%arg0: tensor<2x4xf64, #MAT_C_C>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64> + return %0 : tensor<9x4xf64> + } + + // Concats mix sparse and dense matrices to a sparse matrix + func.func @concat_mix_sparse(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C> + return %0 : tensor<9x4xf64, #MAT_C_C> + } + + // Concats mix sparse and dense matrices to a dense matrix + func.func @concat_mix_dense(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64> + return %0 : tensor<9x4xf64> + } + + // + // Tests with permutation. + // + + // Concats all sparse matrices (with different encodings) to a sparse matrix. + func.func @concat_sparse_sparse_perm(%arg0: tensor<2x4xf64, #MAT_C_C_P>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C_P> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C_P> + return %0 : tensor<9x4xf64, #MAT_C_C_P> + } + + // Concats all sparse matrices (with different encodings) to a dense matrix. + func.func @concat_sparse_dense_perm(%arg0: tensor<2x4xf64, #MAT_C_C_P>, %arg1: tensor<3x4xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64> + return %0 : tensor<9x4xf64> + } + + // Concats mix sparse and dense matrices to a sparse matrix + func.func @concat_mix_sparse_perm(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C> + return %0 : tensor<9x4xf64, #MAT_C_C> + } + + // Concats mix sparse and dense matrices to a dense matrix + func.func @concat_mix_dense_perm(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C_P>) -> tensor<9x4xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index} + : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P> to tensor<9x4xf64> + return %0 : tensor<9x4xf64> + } + + // + // Tests without perumutation (concatenate on dimension 1) + // + + // Concats all sparse matrices (with different encodings) to a sparse matrix. + func.func @concat_sparse_sparse_dim1(%arg0: tensor<4x2xf64, #MAT_C_C>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C> + return %0 : tensor<4x9xf64, #MAT_C_C> + } + + // Concats all sparse matrices (with different encodings) to a dense matrix. + func.func @concat_sparse_dense_dim1(%arg0: tensor<4x2xf64, #MAT_C_C>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64> + return %0 : tensor<4x9xf64> + } + + // Concats mix sparse and dense matrices to a sparse matrix + func.func @concat_mix_sparse_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C> + return %0 : tensor<4x9xf64, #MAT_C_C> + } + + // Concats mix sparse and dense matrices to a dense matrix + func.func @concat_mix_dense_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64> + return %0 : tensor<4x9xf64> + } + + // + // Tests with perumutation (concatenate on dimension 1) + // + + // Concats all sparse matrices (with different encodings) to a sparse matrix. + func.func @concat_sparse_sparse_perm_dim1(%arg0: tensor<4x2xf64, #MAT_C_C_P>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C_P> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C_P> + return %0 : tensor<4x9xf64, #MAT_C_C_P> + } + + // Concats all sparse matrices (with different encodings) to a dense matrix. + func.func @concat_sparse_dense_perm_dim1(%arg0: tensor<4x2xf64, #MAT_C_C_P>, %arg1: tensor<4x3xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64> + return %0 : tensor<4x9xf64> + } + + // Concats mix sparse and dense matrices to a sparse matrix + func.func @concat_mix_sparse_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C> + return %0 : tensor<4x9xf64, #MAT_C_C> + } + + // Concats mix sparse and dense matrices to a dense matrix + func.func @concat_mix_dense_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C_P>) -> tensor<4x9xf64> { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P> to tensor<4x9xf64> + return %0 : tensor<4x9xf64> + } + + // + // Concats mix sparse and dense matrices to a sparse matrix (with dynamic sizes) + // + func.func @concat_mix_sparse_dyn(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor { + %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index} + : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor + return %0 : tensor + } + + func.func @dump_mat_9x4(%A: tensor<9x4xf64, #MAT_C_C>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor<9x4xf64, #MAT_C_C> to tensor<9x4xf64> + %m = bufferization.to_memref %c : memref<9x4xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64> + vector.print %v : vector<9x4xf64> + + %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_C_C> to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_perm_9x4(%A: tensor<9x4xf64, #MAT_C_C_P>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor<9x4xf64, #MAT_C_C_P> to tensor<9x4xf64> + %m = bufferization.to_memref %c : memref<9x4xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64> + vector.print %v : vector<9x4xf64> + + %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_C_C_P> to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_dense_9x4(%A: tensor<9x4xf64>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %m = bufferization.to_memref %A : memref<9x4xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64> + vector.print %v : vector<9x4xf64> + + return + } + + func.func @dump_mat_4x9(%A: tensor<4x9xf64, #MAT_C_C>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor<4x9xf64, #MAT_C_C> to tensor<4x9xf64> + %m = bufferization.to_memref %c : memref<4x9xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64> + vector.print %v : vector<4x9xf64> + + %1 = sparse_tensor.values %A : tensor<4x9xf64, #MAT_C_C> to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_dyn(%A: tensor) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor to tensor + %m = bufferization.to_memref %c : memref + %v = vector.transfer_read %m[%c0, %c0], %du: memref, vector<4x9xf64> + vector.print %v : vector<4x9xf64> + + %1 = sparse_tensor.values %A : tensor to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_perm_4x9(%A: tensor<4x9xf64, #MAT_C_C_P>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %c = sparse_tensor.convert %A : tensor<4x9xf64, #MAT_C_C_P> to tensor<4x9xf64> + %m = bufferization.to_memref %c : memref<4x9xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64> + vector.print %v : vector<4x9xf64> + + %1 = sparse_tensor.values %A : tensor<4x9xf64, #MAT_C_C_P> to memref + %2 = vector.transfer_read %1[%c0], %du: memref, vector<36xf64> + vector.print %2 : vector<36xf64> + + return + } + + func.func @dump_mat_dense_4x9(%A: tensor<4x9xf64>) { + %c0 = arith.constant 0 : index + %du = arith.constant -1.0 : f64 + + %m = bufferization.to_memref %A : memref<4x9xf64> + %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64> + vector.print %v : vector<4x9xf64> + + return + } + + // Driver method to call and verify kernels. + func.func @entry() { + %m42 = arith.constant dense< + [ [ 1.0, 0.0 ], + [ 3.1, 0.0 ], + [ 0.0, 2.0 ], + [ 0.0, 0.0 ] ]> : tensor<4x2xf64> + %m43 = arith.constant dense< + [ [ 1.0, 0.0, 1.0 ], + [ 1.0, 0.0, 0.5 ], + [ 0.0, 0.0, 1.0 ], + [ 5.0, 2.0, 0.0 ] ]> : tensor<4x3xf64> + %m24 = arith.constant dense< + [ [ 1.0, 0.0, 3.0, 0.0], + [ 0.0, 2.0, 0.0, 0.0] ]> : tensor<2x4xf64> + %m34 = arith.constant dense< + [ [ 1.0, 0.0, 1.0, 1.0], + [ 0.0, 0.5, 0.0, 0.0], + [ 1.0, 5.0, 2.0, 0.0] ]> : tensor<3x4xf64> + %m44 = arith.constant dense< + [ [ 0.0, 0.0, 1.5, 1.0], + [ 0.0, 3.5, 0.0, 0.0], + [ 1.0, 5.0, 2.0, 0.0], + [ 1.0, 0.5, 0.0, 0.0] ]> : tensor<4x4xf64> + + %sm24cc = sparse_tensor.convert %m24 : tensor<2x4xf64> to tensor<2x4xf64, #MAT_C_C> + %sm34cd = sparse_tensor.convert %m34 : tensor<3x4xf64> to tensor<3x4xf64, #MAT_C_D> + %sm42cc = sparse_tensor.convert %m42 : tensor<4x2xf64> to tensor<4x2xf64, #MAT_C_C> + %sm43cd = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor<4x3xf64, #MAT_C_D> + %sm44dc = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor<4x4xf64, #MAT_D_C> + + %sm24ccp = sparse_tensor.convert %m24 : tensor<2x4xf64> to tensor<2x4xf64, #MAT_C_C_P> + %sm34cdp = sparse_tensor.convert %m34 : tensor<3x4xf64> to tensor<3x4xf64, #MAT_C_D_P> + %sm42ccp = sparse_tensor.convert %m42 : tensor<4x2xf64> to tensor<4x2xf64, #MAT_C_C_P> + %sm43cdp = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor<4x3xf64, #MAT_C_D_P> + %sm44dcp = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor<4x4xf64, #MAT_D_C_P> + + %sm43cd_dyn = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor + %sm44dc_dyn = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor + + // CHECK: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %0 = call @concat_sparse_sparse(%sm24cc, %sm34cd, %sm44dc) + : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> + call @dump_mat_9x4(%0) : (tensor<9x4xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + %1 = call @concat_sparse_dense(%sm24cc, %sm34cd, %sm44dc) + : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> + call @dump_mat_dense_9x4(%1) : (tensor<9x4xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %2 = call @concat_mix_sparse(%m24, %sm34cd, %sm44dc) + : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> + call @dump_mat_9x4(%2) : (tensor<9x4xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + %3 = call @concat_mix_dense(%m24, %sm34cd, %sm44dc) + : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> + call @dump_mat_dense_9x4(%3) : (tensor<9x4xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1, 1, 2, 0, 0.5, 5, 3.5, 5, 0.5, 3, 1, 0, 2, 1.5, 2, 1, 0, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %4 = call @concat_sparse_sparse_perm(%sm24ccp, %sm34cd, %sm44dc) + : (tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C_P> + call @dump_mat_perm_9x4(%4) : (tensor<9x4xf64, #MAT_C_C_P>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + %5 = call @concat_sparse_dense_perm(%sm24ccp, %sm34cdp, %sm44dc) + : (tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> + call @dump_mat_dense_9x4(%5) : (tensor<9x4xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %6 = call @concat_mix_sparse_perm(%m24, %sm34cdp, %sm44dc) + : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> + call @dump_mat_9x4(%6) : (tensor<9x4xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) ) + %7 = call @concat_mix_dense_perm(%m24, %sm34cd, %sm44dcp) + : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P>) -> tensor<9x4xf64> + call @dump_mat_dense_9x4(%7) : (tensor<9x4xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %8 = call @concat_sparse_sparse_dim1(%sm42cc, %sm43cd, %sm44dc) + : (tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> + call @dump_mat_4x9(%8) : (tensor<4x9xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + %9 = call @concat_sparse_dense_dim1(%sm42cc, %sm43cd, %sm44dc) + : (tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> + call @dump_mat_dense_4x9(%9) : (tensor<4x9xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %10 = call @concat_mix_sparse_dim1(%m42, %sm43cd, %sm44dc) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> + call @dump_mat_4x9(%10) : (tensor<4x9xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + %11 = call @concat_mix_dense_dim1(%m42, %sm43cd, %sm44dc) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> + call @dump_mat_dense_4x9(%11) : (tensor<4x9xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 3.1, 2, 1, 1, 0, 5, 0, 0, 0, 2, 1, 0.5, 1, 0, 1, 1, 3.5, 5, 0.5, 1.5, 2, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %12 = call @concat_sparse_sparse_perm_dim1(%sm42ccp, %sm43cd, %sm44dc) + : (tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C_P> + call @dump_mat_perm_4x9(%12) : (tensor<4x9xf64, #MAT_C_C_P>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + %13 = call @concat_sparse_dense_perm_dim1(%sm42ccp, %sm43cdp, %sm44dc) + : (tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> + call @dump_mat_dense_4x9(%13) : (tensor<4x9xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %14 = call @concat_mix_sparse_perm_dim1(%m42, %sm43cdp, %sm44dc) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> + call @dump_mat_4x9(%14) : (tensor<4x9xf64, #MAT_C_C>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + %15 = call @concat_mix_dense_perm_dim1(%m42, %sm43cd, %sm44dcp) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P>) -> tensor<4x9xf64> + call @dump_mat_dense_4x9(%15) : (tensor<4x9xf64>) -> () + + // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) ) + // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ) + %16 = call @concat_mix_sparse_dyn(%m42, %sm43cd, %sm44dc) + : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor + call @dump_mat_dyn(%16) : (tensor) -> () + + // Release resources. + bufferization.dealloc_tensor %sm24cc : tensor<2x4xf64, #MAT_C_C> + bufferization.dealloc_tensor %sm34cd : tensor<3x4xf64, #MAT_C_D> + bufferization.dealloc_tensor %sm42cc : tensor<4x2xf64, #MAT_C_C> + bufferization.dealloc_tensor %sm43cd : tensor<4x3xf64, #MAT_C_D> + bufferization.dealloc_tensor %sm44dc : tensor<4x4xf64, #MAT_D_C> + bufferization.dealloc_tensor %sm24ccp : tensor<2x4xf64, #MAT_C_C_P> + bufferization.dealloc_tensor %sm34cdp : tensor<3x4xf64, #MAT_C_D_P> + bufferization.dealloc_tensor %sm42ccp : tensor<4x2xf64, #MAT_C_C_P> + bufferization.dealloc_tensor %sm43cdp : tensor<4x3xf64, #MAT_C_D_P> + bufferization.dealloc_tensor %sm44dcp : tensor<4x4xf64, #MAT_D_C_P> + bufferization.dealloc_tensor %0 : tensor<9x4xf64, #MAT_C_C> + bufferization.dealloc_tensor %1 : tensor<9x4xf64> + bufferization.dealloc_tensor %2 : tensor<9x4xf64, #MAT_C_C> + bufferization.dealloc_tensor %3 : tensor<9x4xf64> + bufferization.dealloc_tensor %4 : tensor<9x4xf64, #MAT_C_C_P> + bufferization.dealloc_tensor %5 : tensor<9x4xf64> + bufferization.dealloc_tensor %6 : tensor<9x4xf64, #MAT_C_C> + bufferization.dealloc_tensor %7 : tensor<9x4xf64> + bufferization.dealloc_tensor %8 : tensor<4x9xf64, #MAT_C_C> + bufferization.dealloc_tensor %9 : tensor<4x9xf64> + bufferization.dealloc_tensor %10 : tensor<4x9xf64, #MAT_C_C> + bufferization.dealloc_tensor %11 : tensor<4x9xf64> + bufferization.dealloc_tensor %12 : tensor<4x9xf64, #MAT_C_C_P> + bufferization.dealloc_tensor %13 : tensor<4x9xf64> + bufferization.dealloc_tensor %14 : tensor<4x9xf64, #MAT_C_C> + bufferization.dealloc_tensor %15 : tensor<4x9xf64> + bufferization.dealloc_tensor %16 : tensor + return + } +} diff --git a/mlir/test/Target/SPIRV/joint-matrix-ops.mlir b/mlir/test/Target/SPIRV/joint-matrix-ops.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Target/SPIRV/joint-matrix-ops.mlir @@ -0,0 +1,102 @@ +// RUN: mlir-translate -test-spirv-roundtrip -split-input-file %s | FileCheck %s + +spv.module Logical GLSL450 requires #spv.vce { + // CHECK-LABEL: @joint_matrix_load + spv.func @joint_matrix_load(%ptr : !spv.ptr, %stride : i32) "None" { + // CHECK: {{%.*}} = spv.JointMatrixLoadINTEL {{%.*}}, {{%.*}} : (!spv.ptr, i32) -> !spv.jointmatrix<16x8xi32, RowMajor, Workgroup> + %0 = spv.JointMatrixLoadINTEL %ptr, %stride : (!spv.ptr, i32) -> !spv.jointmatrix<16x8xi32, RowMajor, Workgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_load_memaccess + spv.func @joint_matrix_load_memaccess(%ptr : !spv.ptr, %stride : i32) "None" { + // CHECK: {{%.*}} = spv.JointMatrixLoadINTEL {{%.*}}, {{%.*}} {memory_access = #spv.memory_access} : (!spv.ptr, i32) -> !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %0 = spv.JointMatrixLoadINTEL %ptr, %stride {memory_access = #spv.memory_access} : (!spv.ptr, i32) -> !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_store + spv.func @joint_matrix_store(%ptr : !spv.ptr, %stride : i32, %m : !spv.jointmatrix<16x8xi32, RowMajor, Workgroup>) "None" { + // CHECK: spv.JointMatrixStoreINTEL {{%.*}}, {{%.*}}, {{%.*}} : (!spv.ptr, !spv.jointmatrix<16x8xi32, RowMajor, Workgroup>, i32) + spv.JointMatrixStoreINTEL %ptr, %m, %stride : (!spv.ptr, !spv.jointmatrix<16x8xi32, RowMajor, Workgroup>, i32) + spv.Return + } + + // CHECK-LABEL: @joint_matrix_store_memaccess + spv.func @joint_matrix_store_memaccess(%ptr : !spv.ptr, %m : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %stride : i32) "None" { + // CHECK: spv.JointMatrixStoreINTEL {{%.*}}, {{%.*}}, {{%.*}} {memory_access = #spv.memory_access} : (!spv.ptr, !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, i32) + spv.JointMatrixStoreINTEL %ptr, %m, %stride {memory_access = #spv.memory_access} : (!spv.ptr, !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, i32) + spv.Return + } + + // CHECK-LABEL: @joint_matrix_length + spv.func @joint_matrix_length() -> i32 "None" { + // CHECK: {{%.*}} = spv.JointMatrixWorkItemLengthINTEL : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %0 = spv.JointMatrixWorkItemLengthINTEL : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.ReturnValue %0 : i32 + } + + // CHECK-LABEL: @joint_matrix_muladd + spv.func @joint_matrix_muladd(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<16x8xi32, RowMajor, Subgroup>, %c : !spv.jointmatrix<8x8xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.JointMatrixMadINTEL {{%.*}}, {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, !spv.jointmatrix<16x8xi32, RowMajor, Subgroup> -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + %r = spv.JointMatrixMadINTEL %a, %b, %c : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, !spv.jointmatrix<16x8xi32, RowMajor, Subgroup> -> !spv.jointmatrix<8x8xi32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_add + spv.func @joint_matrix_add(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.IAdd {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %r = spv.IAdd %a, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_sub + spv.func @joint_matrix_sub(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.ISub {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %r = spv.ISub %a, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_sdiv + spv.func @joint_matrix_sdiv(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.SDiv {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %r = spv.SDiv %a, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_udiv + spv.func @joint_matrix_udiv(%a : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.UDiv {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + %r = spv.UDiv %a, %b : !spv.jointmatrix<8x16xi32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_fadd + spv.func @joint_matrix_fadd(%a : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.FAdd {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + %r = spv.FAdd %a, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_fsub + spv.func @joint_matrix_fsub(%a : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.FSub {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + %r = spv.FSub %a, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_fdiv + spv.func @joint_matrix_fdiv(%a : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup>) "None" { + // CHECK: {{%.*}} = spv.FDiv {{%.*}}, {{%.*}} : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + %r = spv.FDiv %a, %b : !spv.jointmatrix<8x16xf32, RowMajor, Subgroup> + spv.Return + } + + // CHECK-LABEL: @joint_matrix_access_chain + spv.func @joint_matrix_access_chain(%a : !spv.ptr, Function>) -> !spv.ptr "None" { + %0 = spv.Constant 0: i32 + // CHECK: {{%.*}} = spv.AccessChain {{%.*}}[{{%.*}}] : !spv.ptr, Function>, i32 + %1 = spv.AccessChain %a[%0] : !spv.ptr, Function>, i32 + spv.ReturnValue %1 : !spv.ptr + } +} diff --git a/mlir/test/Target/SPIRV/memory-ops.mlir b/mlir/test/Target/SPIRV/memory-ops.mlir --- a/mlir/test/Target/SPIRV/memory-ops.mlir +++ b/mlir/test/Target/SPIRV/memory-ops.mlir @@ -1,15 +1,25 @@ // RUN: mlir-translate -test-spirv-roundtrip -split-input-file %s | FileCheck %s -// CHECK: spv.func {{@.*}}([[ARG1:%.*]]: !spv.ptr, [[ARG2:%.*]]: !spv.ptr) "None" { -// CHECK-NEXT: [[VALUE:%.*]] = spv.Load "Input" [[ARG1]] : f32 -// CHECK-NEXT: spv.Store "Output" [[ARG2]], [[VALUE]] : f32 spv.module Logical GLSL450 requires #spv.vce { + // CHECK-LABEL: spv.func @load_store + // CHECK-SAME: ([[ARG1:%.*]]: !spv.ptr, [[ARG2:%.*]]: !spv.ptr) spv.func @load_store(%arg0 : !spv.ptr, %arg1 : !spv.ptr) "None" { + // CHECK-NEXT: [[VALUE:%.*]] = spv.Load "Input" [[ARG1]] : f32 %1 = spv.Load "Input" %arg0 : f32 + // CHECK-NEXT: spv.Store "Output" [[ARG2]], [[VALUE]] : f32 spv.Store "Output" %arg1, %1 : f32 spv.Return } + + // CHECK-LABEL: spv.func @load_store_memory_operands + spv.func @load_store_memory_operands(%arg0 : !spv.ptr, %arg1 : !spv.ptr) "None" { + // CHECK: spv.Load "Input" %{{.+}} ["Volatile|Aligned", 4] : f32 + %1 = spv.Load "Input" %arg0 ["Volatile|Aligned", 4]: f32 + // CHECK: spv.Store "Output" %{{.+}}, %{{.+}} ["Volatile|Aligned", 4] : f32 + spv.Store "Output" %arg1, %1 ["Volatile|Aligned", 4]: f32 + spv.Return + } } // ----- diff --git a/mlir/test/Transforms/test-toposort.mlir b/mlir/test/Transforms/test-toposort.mlir --- a/mlir/test/Transforms/test-toposort.mlir +++ b/mlir/test/Transforms/test-toposort.mlir @@ -1,27 +1,39 @@ // RUN: mlir-opt -topological-sort %s | FileCheck %s +// RUN: mlir-opt -test-topological-sort-analysis %s | FileCheck %s -check-prefix=CHECK-ANALYSIS // Test producer is after user. // CHECK-LABEL: test.graph_region -test.graph_region { +// CHECK-ANALYSIS-LABEL: test.graph_region +test.graph_region attributes{"root"} { // CHECK-NEXT: test.foo // CHECK-NEXT: test.baz // CHECK-NEXT: test.bar - %0 = "test.foo"() : () -> i32 - "test.bar"(%1, %0) : (i32, i32) -> () - %1 = "test.baz"() : () -> i32 + + // CHECK-ANALYSIS-NEXT: test.foo{{.*}} {pos = 0 + // CHECK-ANALYSIS-NEXT: test.bar{{.*}} {pos = 2 + // CHECK-ANALYSIS-NEXT: test.baz{{.*}} {pos = 1 + %0 = "test.foo"() {selected} : () -> i32 + "test.bar"(%1, %0) {selected} : (i32, i32) -> () + %1 = "test.baz"() {selected} : () -> i32 } // Test cycles. // CHECK-LABEL: test.graph_region -test.graph_region { +// CHECK-ANALYSIS-LABEL: test.graph_region +test.graph_region attributes{"root"} { // CHECK-NEXT: test.d // CHECK-NEXT: test.a // CHECK-NEXT: test.c // CHECK-NEXT: test.b - %2 = "test.c"(%1) : (i32) -> i32 + + // CHECK-ANALYSIS-NEXT: test.c{{.*}} {pos = 0 + // CHECK-ANALYSIS-NEXT: test.b{{.*}} : ( + // CHECK-ANALYSIS-NEXT: test.a{{.*}} {pos = 2 + // CHECK-ANALYSIS-NEXT: test.d{{.*}} {pos = 1 + %2 = "test.c"(%1) {selected} : (i32) -> i32 %1 = "test.b"(%0, %2) : (i32, i32) -> i32 - %0 = "test.a"(%3) : (i32) -> i32 - %3 = "test.d"() : () -> i32 + %0 = "test.a"(%3) {selected} : (i32) -> i32 + %3 = "test.d"() {selected} : () -> i32 } // Test block arguments. diff --git a/mlir/test/lib/Analysis/DataFlow/TestDenseDataFlowAnalysis.cpp b/mlir/test/lib/Analysis/DataFlow/TestDenseDataFlowAnalysis.cpp --- a/mlir/test/lib/Analysis/DataFlow/TestDenseDataFlowAnalysis.cpp +++ b/mlir/test/lib/Analysis/DataFlow/TestDenseDataFlowAnalysis.cpp @@ -61,9 +61,6 @@ /// The lattice is always initialized. bool isUninitialized() const override { return false; } - /// Initialize the lattice. Does nothing. - ChangeResult defaultInitialize() override { return ChangeResult::NoChange; } - /// Mark the lattice as having reached its pessimistic fixpoint. That is, the /// last modifications of all memory resources are unknown. ChangeResult reset() override { @@ -73,9 +70,6 @@ return ChangeResult::Change; } - /// The lattice is never at a fixpoint. - bool isAtFixpoint() const override { return false; } - /// Join the last modifications. ChangeResult join(const AbstractDenseLattice &lattice) override { const auto &rhs = static_cast(lattice); diff --git a/mlir/test/lib/Analysis/TestDataFlowFramework.cpp b/mlir/test/lib/Analysis/TestDataFlowFramework.cpp --- a/mlir/test/lib/Analysis/TestDataFlowFramework.cpp +++ b/mlir/test/lib/Analysis/TestDataFlowFramework.cpp @@ -20,9 +20,6 @@ using AnalysisState::AnalysisState; - /// Default-initialize the state to zero. - ChangeResult defaultInitialize() override { return join(0); } - /// Returns true if the state is uninitialized. bool isUninitialized() const override { return !state; } diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -249,14 +249,16 @@ template static SmallVector -getGpuProcIds(OpBuilder &b, Location loc, ArrayRef parallelLoopRanges) { +getGpuProcIds(OpBuilder &b, Location loc, ArrayRef parallelLoopRanges, + ArrayRef distributionMethod) { size_t count = std::min(3, parallelLoopRanges.size()); SmallVector procInfo(count); Type indexType = b.getIndexType(); for (unsigned i = 0; i < count; ++i) { gpu::Dimension dim = *gpu::symbolizeDimension(i); procInfo[count - 1 - i] = {b.create(loc, indexType, dim), - b.create(loc, indexType, dim)}; + b.create(loc, indexType, dim), + distributionMethod[count - 1 - i]}; } return procInfo; } @@ -265,10 +267,15 @@ RewritePatternSet &patterns) { { LinalgLoopDistributionOptions cyclicNprocsEqNiters; - cyclicNprocsEqNiters.distributionMethod.resize( - 2, DistributionMethod::CyclicNumProcsEqNumIters); + SmallVector distributionMethod = { + DistributionMethod::CyclicNumProcsEqNumIters, + DistributionMethod::CyclicNumProcsEqNumIters}; cyclicNprocsEqNiters.procInfo = - getGpuProcIds; + [distributionMethod](OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { + return getGpuProcIds( + b, loc, parallelLoopRanges, distributionMethod); + }; patterns.add( MatmulOp::getOperationName(), context, LinalgTilingOptions() @@ -282,10 +289,15 @@ { LinalgLoopDistributionOptions cyclicNprocsGeNiters; - cyclicNprocsGeNiters.distributionMethod.resize( - 2, DistributionMethod::CyclicNumProcsGeNumIters); + SmallVector distributionMethod = { + DistributionMethod::CyclicNumProcsGeNumIters, + DistributionMethod::CyclicNumProcsGeNumIters}; cyclicNprocsGeNiters.procInfo = - getGpuProcIds; + [distributionMethod](OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { + return getGpuProcIds( + b, loc, parallelLoopRanges, distributionMethod); + }; patterns.add( MatmulOp::getOperationName(), context, LinalgTilingOptions() @@ -299,10 +311,14 @@ { LinalgLoopDistributionOptions cyclicNprocsDefault; - cyclicNprocsDefault.distributionMethod.resize(2, - DistributionMethod::Cyclic); + SmallVector distributionMethod = { + DistributionMethod::Cyclic, DistributionMethod::Cyclic}; cyclicNprocsDefault.procInfo = - getGpuProcIds; + [distributionMethod](OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { + return getGpuProcIds( + b, loc, parallelLoopRanges, distributionMethod); + }; patterns.add( MatmulOp::getOperationName(), context, LinalgTilingOptions() @@ -316,10 +332,15 @@ { LinalgLoopDistributionOptions cyclicNprocsMixed1; - cyclicNprocsMixed1.distributionMethod = { + SmallVector distributionMethod = { DistributionMethod::CyclicNumProcsEqNumIters, DistributionMethod::CyclicNumProcsGeNumIters}; - cyclicNprocsMixed1.procInfo = getGpuProcIds; + cyclicNprocsMixed1.procInfo = + [distributionMethod](OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { + return getGpuProcIds( + b, loc, parallelLoopRanges, distributionMethod); + }; patterns.add( MatmulOp::getOperationName(), context, LinalgTilingOptions() @@ -333,10 +354,15 @@ { LinalgLoopDistributionOptions cyclicNprocsMixed2; - cyclicNprocsMixed2.distributionMethod = { + SmallVector distributionMethod = { DistributionMethod::CyclicNumProcsGeNumIters, DistributionMethod::Cyclic}; - cyclicNprocsMixed2.procInfo = getGpuProcIds; + cyclicNprocsMixed2.procInfo = + [distributionMethod](OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { + return getGpuProcIds( + b, loc, parallelLoopRanges, distributionMethod); + }; patterns.add( MatmulOp::getOperationName(), context, LinalgTilingOptions() @@ -350,10 +376,15 @@ { LinalgLoopDistributionOptions cyclicNprocsMixed3; - cyclicNprocsMixed3.distributionMethod = { + SmallVector distributionMethod = { DistributionMethod::Cyclic, DistributionMethod::CyclicNumProcsEqNumIters}; - cyclicNprocsMixed3.procInfo = getGpuProcIds; + cyclicNprocsMixed3.procInfo = + [distributionMethod](OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { + return getGpuProcIds( + b, loc, parallelLoopRanges, distributionMethod); + }; patterns.add( MatmulOp::getOperationName(), context, @@ -368,10 +399,14 @@ { LinalgLoopDistributionOptions cyclicNprocsEqNiters; - cyclicNprocsEqNiters.distributionMethod.resize(2, - DistributionMethod::Cyclic); + SmallVector distributionMethod = { + DistributionMethod::Cyclic, DistributionMethod::Cyclic}; cyclicNprocsEqNiters.procInfo = - getGpuProcIds; + [distributionMethod](OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { + return getGpuProcIds( + b, loc, parallelLoopRanges, distributionMethod); + }; patterns.add( MatmulOp::getOperationName(), context, LinalgTilingOptions() @@ -387,8 +422,14 @@ static void fillTileFuseAndDistributePatterns(MLIRContext *context, RewritePatternSet &patterns) { LinalgLoopDistributionOptions cyclicNprocsEqNiters; - cyclicNprocsEqNiters.distributionMethod.resize(2, DistributionMethod::Cyclic); - cyclicNprocsEqNiters.procInfo = getGpuProcIds; + SmallVector distributionMethod = { + DistributionMethod::Cyclic, DistributionMethod::Cyclic}; + cyclicNprocsEqNiters.procInfo = + [distributionMethod](OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { + return getGpuProcIds( + b, loc, parallelLoopRanges, distributionMethod); + }; patterns.add( MatmulOp::getOperationName(), context, LinalgTilingAndFusionOptions() diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ TestControlFlowSink.cpp TestInlining.cpp TestIntRangeInference.cpp + TestTopologicalSort.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Transforms/TestTopologicalSort.cpp b/mlir/test/lib/Transforms/TestTopologicalSort.cpp new file mode 100644 --- /dev/null +++ b/mlir/test/lib/Transforms/TestTopologicalSort.cpp @@ -0,0 +1,62 @@ +//===- TestTopologicalSort.cpp - Pass to test topological sort analysis ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/TopologicalSortUtils.h" + +using namespace mlir; + +namespace { +struct TestTopologicalSortAnalysisPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestTopologicalSortAnalysisPass) + + StringRef getArgument() const final { + return "test-topological-sort-analysis"; + } + StringRef getDescription() const final { + return "Test topological sorting of ops"; + } + + void runOnOperation() override { + Operation *op = getOperation(); + OpBuilder builder(op->getContext()); + + op->walk([&](Operation *root) { + if (!root->hasAttr("root")) + return WalkResult::advance(); + + assert(root->getNumRegions() == 1 && root->getRegion(0).hasOneBlock() && + "expected one block"); + Block *block = &root->getRegion(0).front(); + SmallVector selectedOps; + block->walk([&](Operation *op) { + if (op->hasAttr("selected")) + selectedOps.push_back(op); + }); + + computeTopologicalSorting(block, selectedOps); + for (const auto &it : llvm::enumerate(selectedOps)) + it.value()->setAttr("pos", builder.getIndexAttr(it.index())); + + return WalkResult::advance(); + }); + } +}; +} // namespace + +namespace mlir { +namespace test { +void registerTestTopologicalSortAnalysisPass() { + PassRegistration(); +} +} // namespace test +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -111,6 +111,7 @@ void registerTestSliceAnalysisPass(); void registerTestTensorTransforms(); void registerTestTilingInterface(); +void registerTestTopologicalSortAnalysisPass(); void registerTestTransformDialectInterpreterPass(); void registerTestVectorLowerings(); void registerTestNvgpuLowerings(); @@ -207,6 +208,7 @@ mlir::test::registerTestSliceAnalysisPass(); mlir::test::registerTestTensorTransforms(); mlir::test::registerTestTilingInterface(); + mlir::test::registerTestTopologicalSortAnalysisPass(); mlir::test::registerTestTransformDialectInterpreterPass(); mlir::test::registerTestVectorLowerings(); mlir::test::registerTestNvgpuLowerings(); diff --git a/mlir/tools/mlir-pdll/CMakeLists.txt b/mlir/tools/mlir-pdll/CMakeLists.txt --- a/mlir/tools/mlir-pdll/CMakeLists.txt +++ b/mlir/tools/mlir-pdll/CMakeLists.txt @@ -12,7 +12,9 @@ MLIRPDLLParser ) -add_tablegen(mlir-pdll MLIR_PDLL DESTINATION "${MLIR_TOOLS_INSTALL_DIR}" +add_tablegen(mlir-pdll MLIR_PDLL + DESTINATION "${MLIR_TOOLS_INSTALL_DIR}" + EXPORT MLIR mlir-pdll.cpp DEPENDS diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt --- a/mlir/tools/mlir-tblgen/CMakeLists.txt +++ b/mlir/tools/mlir-tblgen/CMakeLists.txt @@ -4,7 +4,9 @@ TableGen ) -add_tablegen(mlir-tblgen MLIR DESTINATION "${MLIR_TOOLS_INSTALL_DIR}" +add_tablegen(mlir-tblgen MLIR + DESTINATION "${MLIR_TOOLS_INSTALL_DIR}" + EXPORT MLIR AttrOrTypeDefGen.cpp AttrOrTypeFormatGen.cpp CodeGenHelpers.cpp diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -518,7 +518,8 @@ os << tabs << formatv("if (auto attr = {0}->getAttr(\"{1}\")) {{\n", opVar, attrName); if (attr.getAttrDefName() == "SPV_ScopeAttr" || - attr.getAttrDefName() == "SPV_MemorySemanticsAttr") { + attr.getAttrDefName() == "SPV_MemorySemanticsAttr" || + attr.getAttrDefName() == "SPV_MatrixLayoutAttr") { // These two enums are encoded as to constant values in SPIR-V blob, // but we directly use the constant value as attribute in SPIR-V dialect. So // need to handle them separately from normal enum attributes. @@ -810,7 +811,8 @@ StringRef words, StringRef wordIndex, raw_ostream &os) { if (attr.getAttrDefName() == "SPV_ScopeAttr" || - attr.getAttrDefName() == "SPV_MemorySemanticsAttr") { + attr.getAttrDefName() == "SPV_MemorySemanticsAttr" || + attr.getAttrDefName() == "SPV_MatrixLayoutAttr") { // These two enums are encoded as to constant values in SPIR-V blob, // but we directly use the constant value as attribute in SPIR-V dialect. So // need to handle them separately from normal enum attributes. diff --git a/mlir/unittests/ExecutionEngine/CMakeLists.txt b/mlir/unittests/ExecutionEngine/CMakeLists.txt --- a/mlir/unittests/ExecutionEngine/CMakeLists.txt +++ b/mlir/unittests/ExecutionEngine/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_unittest(MLIRExecutionEngineTests + DynamicMemRef.cpp Invoke.cpp ) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) diff --git a/mlir/unittests/ExecutionEngine/DynamicMemRef.cpp b/mlir/unittests/ExecutionEngine/DynamicMemRef.cpp new file mode 100644 --- /dev/null +++ b/mlir/unittests/ExecutionEngine/DynamicMemRef.cpp @@ -0,0 +1,99 @@ +//===- DynamicMemRef.cpp ----------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/ExecutionEngine/CRunnerUtils.h" +#include "llvm/ADT/SmallVector.h" + +#include "gmock/gmock.h" + +using namespace ::mlir; +using namespace ::testing; + +TEST(DynamicMemRef, rankZero) { + int data = 57; + + StridedMemRefType memRef; + memRef.basePtr = &data; + memRef.data = &data; + memRef.offset = 0; + + DynamicMemRefType dynamicMemRef(memRef); + + llvm::SmallVector values(dynamicMemRef.begin(), dynamicMemRef.end()); + EXPECT_THAT(values, ElementsAre(57)); +} + +TEST(DynamicMemRef, rankOne) { + std::array data; + + for (size_t i = 0; i < data.size(); ++i) { + data[i] = i; + } + + StridedMemRefType memRef; + memRef.basePtr = data.data(); + memRef.data = data.data(); + memRef.offset = 0; + memRef.sizes[0] = 3; + memRef.strides[0] = 1; + + DynamicMemRefType dynamicMemRef(memRef); + + llvm::SmallVector values(dynamicMemRef.begin(), dynamicMemRef.end()); + EXPECT_THAT(values, ElementsAreArray(data)); + + for (int64_t i = 0; i < 3; ++i) { + EXPECT_EQ(*dynamicMemRef[i], data[i]); + } +} + +TEST(DynamicMemRef, rankTwo) { + std::array data; + + for (size_t i = 0; i < data.size(); ++i) { + data[i] = i; + } + + StridedMemRefType memRef; + memRef.basePtr = data.data(); + memRef.data = data.data(); + memRef.offset = 0; + memRef.sizes[0] = 2; + memRef.sizes[1] = 3; + memRef.strides[0] = 3; + memRef.strides[1] = 1; + + DynamicMemRefType dynamicMemRef(memRef); + + llvm::SmallVector values(dynamicMemRef.begin(), dynamicMemRef.end()); + EXPECT_THAT(values, ElementsAreArray(data)); +} + +TEST(DynamicMemRef, rankThree) { + std::array data; + + for (size_t i = 0; i < data.size(); ++i) { + data[i] = i; + } + + StridedMemRefType memRef; + memRef.basePtr = data.data(); + memRef.data = data.data(); + memRef.offset = 0; + memRef.sizes[0] = 2; + memRef.sizes[1] = 3; + memRef.sizes[2] = 4; + memRef.strides[0] = 12; + memRef.strides[1] = 4; + memRef.strides[2] = 1; + + DynamicMemRefType dynamicMemRef(memRef); + + llvm::SmallVector values(dynamicMemRef.begin(), dynamicMemRef.end()); + EXPECT_THAT(values, ElementsAreArray(data)); +} \ No newline at end of file diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -580,7 +580,7 @@ int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, AsyncInfoTy &AsyncInfo) { - if (!RTL->run_region || !RTL->synchronize) + if (!RTL->run_region_async || !RTL->synchronize) return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, TgtVarsSize); return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, diff --git a/third-party/benchmark/src/sysinfo.cc b/third-party/benchmark/src/sysinfo.cc --- a/third-party/benchmark/src/sysinfo.cc +++ b/third-party/benchmark/src/sysinfo.cc @@ -12,6 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#if defined(_MSC_VER) +// FIXME: This must be defined before any other includes to disable deprecation +// warnings for use of codecvt from C++17. We should remove our reliance on +// the deprecated functionality instead. +#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING +#endif + #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS