diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h --- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h @@ -42,6 +42,7 @@ const bool CheckFunctionCalls; const std::string RawAssertList; SmallVector AssertMacros; + const std::vector IgnoredFunctions; }; } // namespace bugprone diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp --- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "AssertSideEffectCheck.h" +#include "../utils/Matchers.h" +#include "../utils/OptionsUtils.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Frontend/CompilerInstance.h" @@ -25,7 +27,9 @@ namespace { -AST_MATCHER_P(Expr, hasSideEffect, bool, CheckFunctionCalls) { +AST_MATCHER_P2(Expr, hasSideEffect, bool, CheckFunctionCalls, + clang::ast_matchers::internal::Matcher, + IgnoredFunctionsMatcher) { const Expr *E = &Node; if (const auto *Op = dyn_cast(E)) { @@ -55,7 +59,8 @@ bool Result = CheckFunctionCalls; if (const auto *FuncDecl = CExpr->getDirectCallee()) { if (FuncDecl->getDeclName().isIdentifier() && - FuncDecl->getName() == "__builtin_expect") // exceptions come here + IgnoredFunctionsMatcher.matches(*FuncDecl, Finder, + Builder)) // exceptions come here Result = false; else if (const auto *MethodDecl = dyn_cast(FuncDecl)) Result &= !MethodDecl->isConst(); @@ -72,8 +77,9 @@ ClangTidyContext *Context) : ClangTidyCheck(Name, Context), CheckFunctionCalls(Options.get("CheckFunctionCalls", false)), - RawAssertList(Options.get("AssertMacros", - "assert,NSAssert,NSCAssert")) { + RawAssertList(Options.get("AssertMacros", "assert,NSAssert,NSCAssert")), + IgnoredFunctions(utils::options::parseStringList( + "__builtin_expect;" + Options.get("IgnoredFunctions", ""))) { StringRef(RawAssertList).split(AssertMacros, ",", -1, false); } @@ -81,11 +87,17 @@ void AssertSideEffectCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "CheckFunctionCalls", CheckFunctionCalls); Options.store(Opts, "AssertMacros", RawAssertList); + Options.store(Opts, "IgnoredFunctions", + utils::options::serializeStringList(IgnoredFunctions)); } void AssertSideEffectCheck::registerMatchers(MatchFinder *Finder) { + auto IgnoredFunctionsMatcher = + matchers::matchesAnyListedName(IgnoredFunctions); + auto DescendantWithSideEffect = - traverse(TK_AsIs, hasDescendant(expr(hasSideEffect(CheckFunctionCalls)))); + traverse(TK_AsIs, hasDescendant(expr(hasSideEffect( + CheckFunctionCalls, IgnoredFunctionsMatcher)))); auto ConditionWithSideEffect = hasCondition(DescendantWithSideEffect); Finder->addMatcher( stmt( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -133,7 +133,7 @@ - New :doc:`readability-duplicate-include ` check. - Looks for duplicate includes and removes them. + Looks for duplicate includes and removes them. - New :doc:`readability-identifier-length ` check. @@ -167,7 +167,13 @@ Changes in existing checks ^^^^^^^^^^^^^^^^^^^^^^^^^^ +- :doc:`bugprone-assert-side-effect ` + check now supports an ``IgnoredFunctions`` option to explicitly consider + the specified semicolon-separated functions list as not having any + side-effects. Regular expressions for the list items are also accepted. + - Removed default setting ``cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors = "true"``, + from :doc:`cppcoreguidelines-explicit-virtual-functions ` to match the current state of the C++ Core Guidelines. - Removed suggestion ``use gsl::at`` from warning message in the @@ -185,10 +191,10 @@ - Fixed a false positive in :doc:`bugprone-throw-keyword-missing ` when creating an exception object - using placement new + using placement new. - :doc:`cppcoreguidelines-narrowing-conversions ` - check now supports a `WarnOnIntegerToFloatingPointNarrowingConversion` + check now supports a ``WarnOnIntegerToFloatingPointNarrowingConversion`` option to control whether to warn on narrowing integer to floating-point conversions. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst @@ -21,3 +21,13 @@ Whether to treat non-const member and non-member functions as they produce side effects. Disabled by default because it can increase the number of false positive warnings. + +.. option:: IgnoredFunctions + + A semicolon-separated list of the names of functions or methods to be + considered as not having side-effects. Regular expressions are accepted, + e.g. `[Rr]ef(erence)?$` matches every type with suffix `Ref`, `ref`, + `Reference` and `reference`. The default is empty. If a name in the list + contains the sequence `::` it is matched against the qualified typename + (i.e. `namespace::Type`, otherwise it is matched against only + the type name (i.e. `Type`). diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s bugprone-assert-side-effect %t -- -config="{CheckOptions: [{key: bugprone-assert-side-effect.CheckFunctionCalls, value: true}, {key: bugprone-assert-side-effect.AssertMacros, value: 'assert,assert2,my_assert,convoluted_assert,msvc_assert'}]}" -- -fexceptions +// RUN: %check_clang_tidy %s bugprone-assert-side-effect %t -- -config="{CheckOptions: [{key: bugprone-assert-side-effect.CheckFunctionCalls, value: true}, {key: bugprone-assert-side-effect.AssertMacros, value: 'assert,assert2,my_assert,convoluted_assert,msvc_assert'}, {key: bugprone-assert-side-effect.IgnoredFunctions, value: 'MyClass::badButIgnoredFunc'}]}" -- -fexceptions //===--- assert definition block ------------------------------------------===// int abort() { return 0; } @@ -43,9 +43,12 @@ //===----------------------------------------------------------------------===// +bool badButIgnoredFunc(int a, int b) { return a * b > 0; } + class MyClass { public: bool badFunc(int a, int b) { return a * b > 0; } + bool badButIgnoredFunc(int a, int b) { return a * b > 0; } bool goodFunc(int a, int b) const { return a * b > 0; } MyClass &operator=(const MyClass &rhs) { return *this; } @@ -57,6 +60,11 @@ void operator delete(void *p) {} }; +class SomeoneElseClass { +public: + bool badButIgnoredFunc(int a, int b) { return a * b > 0; } +}; + bool freeFunction() { return true; } @@ -85,8 +93,16 @@ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds MyClass mc; + SomeoneElseClass sec; assert(mc.badFunc(0, 1)); // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds + assert(mc.badButIgnoredFunc(0, 1)); + // badButIgnoredFunc is not ignored as only class members are ignored by the config + assert(badButIgnoredFunc(0, 1)); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds + // sec.badButIgnoredFunc is not ignored as only MyClass members are ignored by the config + assert(sec.badButIgnoredFunc(0, 1)); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds assert(mc.goodFunc(0, 1)); MyClass mc2; diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -134,6 +134,8 @@ set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS OFF CACHE BOOL "") set(RUNTIMES_${target}_SANITIZER_CXX_ABI "libc++" CACHE STRING "") set(RUNTIMES_${target}_SANITIZER_CXX_ABI_INTREE ON CACHE BOOL "") + set(RUNTIMES_${target}_SANITIZER_TEST_CXX "libc++" CACHE STRING "") + set(RUNTIMES_${target}_SANITIZER_TEST_CXX_INTREE ON CACHE BOOL "") set(RUNTIMES_${target}_COMPILER_RT_TEST_COMPILER_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "") set(RUNTIMES_${target}_SANITIZER_COMMON_TEST_TARGET_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "") set(RUNTIMES_${target}_TSAN_TEST_TARGET_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "") diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -256,9 +256,9 @@ ---------------------- - The default DWARF version has increased from DWARFv4 to DWARFv5. You can opt - back in to the old behavior with -gdwarf-4. Some platforms (Darwin, Android, - and SCE for instance) already opt out of this version bump as is suitable for - the platform + back in to the old behavior with ``-gdwarf-4`` or ``-fdebug-default-version=4``. + Some platforms (Darwin, Android, and SCE for instance) already opt out of this + version bump as is suitable for the platform Arm and AArch64 Support in Clang -------------------------------- diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -1668,11 +1668,13 @@ // 12.10. Vector Single-Width Integer Multiply Instructions defm vmul : RVVIntBinBuiltinSet; +let RequiredFeatures = ["FullMultiply"] in { defm vmulh : RVVSignedBinBuiltinSet; defm vmulhu : RVVUnsignedBinBuiltinSet; defm vmulhsu : RVVOutOp1BuiltinSet<"vmulhsu", "csil", [["vv", "v", "vvUv"], ["vx", "v", "vvUe"]]>; +} // 12.11. Vector Integer Divide Instructions defm vdivu : RVVUnsignedBinBuiltinSet; @@ -1759,7 +1761,9 @@ defm vasub : RVVSignedBinBuiltinSet; // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +let RequiredFeatures = ["FullMultiply"] in { defm vsmul : RVVSignedBinBuiltinSet; +} // 13.4. Vector Single-Width Scaling Shift Instructions defm vssrl : RVVUnsignedShiftBuiltinSet; diff --git a/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h --- a/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h +++ b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h @@ -43,6 +43,15 @@ /// Determines whether printing this expression to the right of a unary operator /// requires a parentheses to preserve its meaning. bool needParensAfterUnaryOperator(const Expr &E); + +// Recognizes known types (and sugared versions thereof) that overload the `*` +// and `->` operator. Below is the list of currently included types, but it is +// subject to change: +// +// * std::unique_ptr, std::shared_ptr, std::weak_ptr, +// * std::optional, absl::optional, llvm::Optional, +// * absl::StatusOr, llvm::Expected. +bool isKnownPointerLikeType(QualType Ty, ASTContext &Context); /// @} /// \name Basic code-string generation utilities. @@ -69,6 +78,8 @@ /// `x` becomes `x.` /// `*a` becomes `a->` /// `a+b` becomes `(a+b).` +/// +/// DEPRECATED. Use `buildAccess`. llvm::Optional buildDot(const Expr &E, const ASTContext &Context); /// Adds an arrow to the end of the given expression, but adds parentheses @@ -77,8 +88,32 @@ /// `x` becomes `x->` /// `&a` becomes `a.` /// `a+b` becomes `(a+b)->` +/// +/// DEPRECATED. Use `buildAccess`. llvm::Optional buildArrow(const Expr &E, const ASTContext &Context); + +/// Specifies how to classify pointer-like types -- like values or like pointers +/// -- with regard to generating member-access syntax. +enum class PLTClass : bool { + Value, + Pointer, +}; + +/// Adds an appropriate access operator (`.`, `->` or nothing, in the case of +/// implicit `this`) to the end of the given expression. Adds parentheses when +/// needed by the syntax and simplifies when possible. If `PLTypeClass` is +/// `Pointer`, for known pointer-like types (see `isKnownPointerLikeType`), +/// treats `operator->` and `operator*` like the built-in `->` and `*` +/// operators. +/// +/// `x` becomes `x->` or `x.`, depending on `E`'s type +/// `a+b` becomes `(a+b)->` or `(a+b).`, depending on `E`'s type +/// `&a` becomes `a.` +/// `*a` becomes `a->` +llvm::Optional +buildAccess(const Expr &E, ASTContext &Context, + PLTClass Classification = PLTClass::Pointer); /// @} } // namespace tooling diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17190,7 +17190,7 @@ case NVPTX::BI__mma_tf32_m16n16k8_ld_a: return MMA_LDST(4, m16n16k8_load_a_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_b: - return MMA_LDST(2, m16n16k8_load_b_tf32); + return MMA_LDST(4, m16n16k8_load_b_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_c: return MMA_LDST(8, m16n16k8_load_c_f32); diff --git a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp --- a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp +++ b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp @@ -10,6 +10,8 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Tooling/Transformer/SourceCode.h" #include "llvm/ADT/Twine.h" #include @@ -60,6 +62,16 @@ return false; } +bool tooling::isKnownPointerLikeType(QualType Ty, ASTContext &Context) { + using namespace ast_matchers; + const auto PointerLikeTy = type(hasUnqualifiedDesugaredType( + recordType(hasDeclaration(cxxRecordDecl(hasAnyName( + "::std::unique_ptr", "::std::shared_ptr", "::std::weak_ptr", + "::std::optional", "::absl::optional", "::llvm::Optional", + "absl::StatusOr", "::llvm::Expected")))))); + return match(PointerLikeTy, Ty, Context).size() > 0; +} + llvm::Optional tooling::buildParens(const Expr &E, const ASTContext &Context) { StringRef Text = getText(E, Context); @@ -114,8 +126,10 @@ return ("&" + Text).str(); } -llvm::Optional tooling::buildDot(const Expr &E, - const ASTContext &Context) { +// Append the appropriate access operation (syntactically) to `E`, assuming `E` +// is a non-pointer value. +static llvm::Optional +buildAccessForValue(const Expr &E, const ASTContext &Context) { if (const auto *Op = llvm::dyn_cast(&E)) if (Op->getOpcode() == UO_Deref) { // Strip leading '*', add following '->'. @@ -138,8 +152,10 @@ return (Text + ".").str(); } -llvm::Optional tooling::buildArrow(const Expr &E, - const ASTContext &Context) { +// Append the appropriate access operation (syntactically) to `E`, assuming `E` +// is a pointer value. +static llvm::Optional +buildAccessForPointer(const Expr &E, const ASTContext &Context) { if (const auto *Op = llvm::dyn_cast(&E)) if (Op->getOpcode() == UO_AddrOf) { // Strip leading '&', add following '.'. @@ -160,3 +176,62 @@ return ("(" + Text + ")->").str(); return (Text + "->").str(); } + +llvm::Optional tooling::buildDot(const Expr &E, + const ASTContext &Context) { + return buildAccessForValue(E, Context); +} + +llvm::Optional tooling::buildArrow(const Expr &E, + const ASTContext &Context) { + return buildAccessForPointer(E, Context); +} + +// If `E` is an overloaded-operator call of kind `K` on an object `O`, returns +// `O`. Otherwise, returns `nullptr`. +static const Expr *maybeGetOperatorObjectArg(const Expr &E, + OverloadedOperatorKind K) { + if (const auto *OpCall = dyn_cast(&E)) { + if (OpCall->getOperator() == K && OpCall->getNumArgs() == 1) + return OpCall->getArg(0); + } + return nullptr; +} + +static bool treatLikePointer(QualType Ty, PLTClass C, ASTContext &Context) { + switch (C) { + case PLTClass::Value: + return false; + case PLTClass::Pointer: + return isKnownPointerLikeType(Ty, Context); + } +} + +// FIXME: move over the other `maybe` functionality from Stencil. Should all be +// in one place. +llvm::Optional tooling::buildAccess(const Expr &RawExpression, + ASTContext &Context, + PLTClass Classification) { + if (RawExpression.isImplicitCXXThis()) + // Return the empty string, because `None` signifies some sort of failure. + return std::string(); + + const Expr *E = RawExpression.IgnoreImplicitAsWritten(); + + if (E->getType()->isAnyPointerType() || + treatLikePointer(E->getType(), Classification, Context)) { + // Strip off operator-> calls. They can only occur inside an actual arrow + // member access, so we treat them as equivalent to an actual object + // expression. + if (const auto *Obj = maybeGetOperatorObjectArg(*E, clang::OO_Arrow)) + E = Obj; + return buildAccessForPointer(*E, Context); + } + + if (const auto *Obj = maybeGetOperatorObjectArg(*E, clang::OO_Star)) { + if (treatLikePointer(Obj->getType(), Classification, Context)) + return buildAccessForPointer(*Obj, Context); + }; + + return buildAccessForValue(*E, Context); +} diff --git a/clang/lib/Tooling/Transformer/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp --- a/clang/lib/Tooling/Transformer/Stencil.cpp +++ b/clang/lib/Tooling/Transformer/Stencil.cpp @@ -11,7 +11,6 @@ #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Tooling/Transformer/SourceCode.h" @@ -56,39 +55,6 @@ return Error::success(); } -// FIXME: Consider memoizing this function using the `ASTContext`. -static bool isSmartPointerType(QualType Ty, ASTContext &Context) { - using namespace ::clang::ast_matchers; - - // Optimization: hard-code common smart-pointer types. This can/should be - // removed if we start caching the results of this function. - auto KnownSmartPointer = - cxxRecordDecl(hasAnyName("::std::unique_ptr", "::std::shared_ptr")); - const auto QuacksLikeASmartPointer = cxxRecordDecl( - hasMethod(cxxMethodDecl(hasOverloadedOperatorName("->"), - returns(qualType(pointsTo(type()))))), - hasMethod(cxxMethodDecl(hasOverloadedOperatorName("*"), - returns(qualType(references(type())))))); - const auto SmartPointer = qualType(hasDeclaration( - cxxRecordDecl(anyOf(KnownSmartPointer, QuacksLikeASmartPointer)))); - return match(SmartPointer, Ty, Context).size() > 0; -} - -// Identifies use of `operator*` on smart pointers, and returns the underlying -// smart-pointer expression; otherwise, returns null. -static const Expr *isSmartDereference(const Expr &E, ASTContext &Context) { - using namespace ::clang::ast_matchers; - - const auto HasOverloadedArrow = cxxRecordDecl(hasMethod(cxxMethodDecl( - hasOverloadedOperatorName("->"), returns(qualType(pointsTo(type())))))); - // Verify it is a smart pointer by finding `operator->` in the class - // declaration. - auto Deref = cxxOperatorCallExpr( - hasOverloadedOperatorName("*"), hasUnaryOperand(expr().bind("arg")), - callee(cxxMethodDecl(ofClass(HasOverloadedArrow)))); - return selectFirst("arg", match(Deref, E, Context)); -} - namespace { // An arbitrary fragment of code within a stencil. class RawTextStencil : public StencilInterface { @@ -196,7 +162,7 @@ break; case UnaryNodeOperator::MaybeDeref: if (E->getType()->isAnyPointerType() || - isSmartPointerType(E->getType(), *Match.Context)) { + tooling::isKnownPointerLikeType(E->getType(), *Match.Context)) { // Strip off any operator->. This can only occur inside an actual arrow // member access, so we treat it as equivalent to an actual object // expression. @@ -216,7 +182,7 @@ break; case UnaryNodeOperator::MaybeAddressOf: if (E->getType()->isAnyPointerType() || - isSmartPointerType(E->getType(), *Match.Context)) { + tooling::isKnownPointerLikeType(E->getType(), *Match.Context)) { // Strip off any operator->. This can only occur inside an actual arrow // member access, so we treat it as equivalent to an actual object // expression. @@ -311,34 +277,12 @@ if (E == nullptr) return llvm::make_error(errc::invalid_argument, "Id not bound: " + BaseId); - if (!E->isImplicitCXXThis()) { - llvm::Optional S; - if (E->getType()->isAnyPointerType() || - isSmartPointerType(E->getType(), *Match.Context)) { - // Strip off any operator->. This can only occur inside an actual arrow - // member access, so we treat it as equivalent to an actual object - // expression. - if (const auto *OpCall = dyn_cast(E)) { - if (OpCall->getOperator() == clang::OO_Arrow && - OpCall->getNumArgs() == 1) { - E = OpCall->getArg(0); - } - } - S = tooling::buildArrow(*E, *Match.Context); - } else if (const auto *Operand = isSmartDereference(*E, *Match.Context)) { - // `buildDot` already handles the built-in dereference operator, so we - // only need to catch overloaded `operator*`. - S = tooling::buildArrow(*Operand, *Match.Context); - } else { - S = tooling::buildDot(*E, *Match.Context); - } - if (S.hasValue()) - *Result += *S; - else - return llvm::make_error( - errc::invalid_argument, - "Could not construct object text from ID: " + BaseId); - } + llvm::Optional S = tooling::buildAccess(*E, *Match.Context); + if (!S.hasValue()) + return llvm::make_error( + errc::invalid_argument, + "Could not construct object text from ID: " + BaseId); + *Result += *S; return Member->eval(Match, Result); } }; diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c @@ -0,0 +1,440 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// NOTE: This test file contains eew=64 of vmulh, vmulhu, vmulhsu. +// NOTE: The purpose of separating these 3 instructions from vmul.c is that +// eew=64 versions only enable when V extension is specified. (Not for zve) + +#include + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) { + return vmulh(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) { + return vmulh(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) { + return vmulh(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) { + return vmulh(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) { + return vmulh(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) { + return vmulh(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) { + return vmulh(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) { + return vmulh(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) { + return vmulhu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) { + return vmulhu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) { + return vmulhu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) { + return vmulhu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) { + return vmulhu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) { + return vmulhu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) { + return vmulhu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) { + return vmulhu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) { + return vmulhsu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) { + return vmulhsu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) { + return vmulhsu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) { + return vmulhsu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) { + return vmulhsu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) { + return vmulhsu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) { + return vmulhsu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) { + return vmulhsu(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) { + return vmulh(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) { + return vmulh(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) { + return vmulh(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) { + return vmulh(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) { + return vmulh(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) { + return vmulh(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) { + return vmulh(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) { + return vmulh(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) { + return vmulhu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) { + return vmulhu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) { + return vmulhu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) { + return vmulhu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) { + return vmulhu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) { + return vmulhu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) { + return vmulhu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) { + return vmulhu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) { + return vmulhsu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) { + return vmulhsu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) { + return vmulhsu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) { + return vmulhsu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) { + return vmulhsu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) { + return vmulhsu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) { + return vmulhsu(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) { + return vmulhsu(mask, maskedoff, op1, op2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include @@ -1120,78 +1120,6 @@ return vmulh(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vmulh(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) { - return vmulh(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vmulh(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) { - return vmulh(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vmulh(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) { - return vmulh(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vmulh(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) { - return vmulh(op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i8.nxv1i8.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) @@ -1516,78 +1444,6 @@ return vmulhu(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vmulhu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) { - return vmulhu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vmulhu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) { - return vmulhu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vmulhu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) { - return vmulhu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vmulhu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) { - return vmulhu(op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i8.nxv1i8.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) @@ -1912,78 +1768,6 @@ return vmulhsu(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vmulhsu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) { - return vmulhsu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vmulhsu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) { - return vmulhsu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vmulhsu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) { - return vmulhsu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vmulhsu(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) { - return vmulhsu(op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmul_vv_i8mf8_m( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmul.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) @@ -3100,78 +2884,6 @@ return vmulh(mask, maskedoff, op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vmulh(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) { - return vmulh(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vmulh(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) { - return vmulh(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vmulh(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) { - return vmulh(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vmulh(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) { - return vmulh(mask, maskedoff, op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8_m( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) @@ -3496,78 +3208,6 @@ return vmulhu(mask, maskedoff, op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vmulhu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) { - return vmulhu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vmulhu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) { - return vmulhu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vmulhu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) { - return vmulhu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vmulhu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) { - return vmulhu(mask, maskedoff, op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8_m( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) @@ -3891,75 +3531,3 @@ vint32m8_t test_vmulhsu_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t op1, uint32_t op2, size_t vl) { return vmulhsu(mask, maskedoff, op1, op2, vl); } - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vmulhsu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) { - return vmulhsu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vmulhsu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) { - return vmulhsu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vmulhsu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) { - return vmulhsu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vmulhsu(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) { - return vmulhsu(mask, maskedoff, op1, op2, vl); -} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c @@ -0,0 +1,159 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// NOTE: The purpose of separating these 3 instructions from vsmul.c is that +// eew=64 versions only enable when V extension is specified. (Not for zve) + +#include + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) { + return vsmul(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) { + return vsmul(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) { + return vsmul(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) { + return vsmul(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) { + return vsmul(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) { + return vsmul(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) { + return vsmul(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) { + return vsmul(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, + vint64m1_t op1, vint64m1_t op2, size_t vl) { + return vsmul(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, + vint64m1_t op1, int64_t op2, size_t vl) { + return vsmul(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, + vint64m2_t op1, vint64m2_t op2, size_t vl) { + return vsmul(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, + vint64m2_t op1, int64_t op2, size_t vl) { + return vsmul(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, + vint64m4_t op1, vint64m4_t op2, size_t vl) { + return vsmul(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, + vint64m4_t op1, int64_t op2, size_t vl) { + return vsmul(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, + vint64m8_t op1, vint64m8_t op2, size_t vl) { + return vsmul(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, + vint64m8_t op1, int64_t op2, size_t vl) { + return vsmul(mask, maskedoff, op1, op2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include @@ -328,78 +328,6 @@ return vsmul(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vsmul(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) { - return vsmul(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vsmul(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) { - return vsmul(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vsmul(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) { - return vsmul(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vsmul(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) { - return vsmul(op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vsmul_vv_i8mf8_m( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) @@ -762,83 +690,3 @@ vint32m8_t op1, int32_t op2, size_t vl) { return vsmul(mask, maskedoff, op1, op2, vl); } - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, - vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vsmul(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, - vint64m1_t op1, int64_t op2, size_t vl) { - return vsmul(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, - vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vsmul(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, - vint64m2_t op1, int64_t op2, size_t vl) { - return vsmul(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, - vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vsmul(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, - vint64m4_t op1, int64_t op2, size_t vl) { - return vsmul(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, - vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vsmul(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, - vint64m8_t op1, int64_t op2, size_t vl) { - return vsmul(mask, maskedoff, op1, op2, vl); -} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c @@ -0,0 +1,440 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// NOTE: This test file contains eew=64 of vmulh, vmulhu, vmulhsu. +// NOTE: The purpose of separating these 3 instructions from vmul.c is that +// eew=64 versions only enable when V extension is specified. (Not for zve) + +#include + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) { + return vmulh_vv_i64m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) { + return vmulh_vx_i64m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) { + return vmulh_vv_i64m2(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) { + return vmulh_vx_i64m2(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) { + return vmulh_vv_i64m4(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) { + return vmulh_vx_i64m4(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) { + return vmulh_vv_i64m8(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) { + return vmulh_vx_i64m8(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) { + return vmulhu_vv_u64m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) { + return vmulhu_vx_u64m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) { + return vmulhu_vv_u64m2(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) { + return vmulhu_vx_u64m2(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) { + return vmulhu_vv_u64m4(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) { + return vmulhu_vx_u64m4(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) { + return vmulhu_vv_u64m8(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) { + return vmulhu_vx_u64m8(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) { + return vmulhsu_vv_i64m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) { + return vmulhsu_vx_i64m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) { + return vmulhsu_vv_i64m2(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) { + return vmulhsu_vx_i64m2(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) { + return vmulhsu_vv_i64m4(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) { + return vmulhsu_vx_i64m4(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) { + return vmulhsu_vv_i64m8(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) { + return vmulhsu_vx_i64m8(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) { + return vmulh_vv_i64m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) { + return vmulh_vx_i64m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) { + return vmulh_vv_i64m2_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) { + return vmulh_vx_i64m2_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) { + return vmulh_vv_i64m4_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) { + return vmulh_vx_i64m4_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) { + return vmulh_vv_i64m8_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) { + return vmulh_vx_i64m8_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) { + return vmulhu_vv_u64m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) { + return vmulhu_vx_u64m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) { + return vmulhu_vv_u64m2_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) { + return vmulhu_vx_u64m2_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) { + return vmulhu_vv_u64m4_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) { + return vmulhu_vx_u64m4_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) { + return vmulhu_vv_u64m8_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) { + return vmulhu_vx_u64m8_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) { + return vmulhsu_vv_i64m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) { + return vmulhsu_vx_i64m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) { + return vmulhsu_vv_i64m2_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) { + return vmulhsu_vx_i64m2_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) { + return vmulhsu_vv_i64m4_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) { + return vmulhsu_vx_i64m4_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) { + return vmulhsu_vv_i64m8_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) { + return vmulhsu_vx_i64m8_m(mask, maskedoff, op1, op2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include @@ -1120,78 +1120,6 @@ return vmulh_vx_i32m8(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vmulh_vv_i64m1(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) { - return vmulh_vx_i64m1(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vmulh_vv_i64m2(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) { - return vmulh_vx_i64m2(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vmulh_vv_i64m4(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) { - return vmulh_vx_i64m4(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vmulh_vv_i64m8(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) { - return vmulh_vx_i64m8(op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i8.nxv1i8.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) @@ -1516,78 +1444,6 @@ return vmulhu_vx_u32m8(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vmulhu_vv_u64m1(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) { - return vmulhu_vx_u64m1(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vmulhu_vv_u64m2(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) { - return vmulhu_vx_u64m2(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vmulhu_vv_u64m4(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) { - return vmulhu_vx_u64m4(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vmulhu_vv_u64m8(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) { - return vmulhu_vx_u64m8(op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i8.nxv1i8.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) @@ -1912,78 +1768,6 @@ return vmulhsu_vx_i32m8(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vmulhsu_vv_i64m1(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) { - return vmulhsu_vx_i64m1(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vmulhsu_vv_i64m2(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) { - return vmulhsu_vx_i64m2(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vmulhsu_vv_i64m4(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) { - return vmulhsu_vx_i64m4(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vmulhsu_vv_i64m8(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) { - return vmulhsu_vx_i64m8(op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmul_vv_i8mf8_m( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmul.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) @@ -3100,78 +2884,6 @@ return vmulh_vx_i32m8_m(mask, maskedoff, op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vmulh_vv_i64m1_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) { - return vmulh_vx_i64m1_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vmulh_vv_i64m2_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) { - return vmulh_vx_i64m2_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vmulh_vv_i64m4_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) { - return vmulh_vx_i64m4_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vmulh_vv_i64m8_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulh.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) { - return vmulh_vx_i64m8_m(mask, maskedoff, op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8_m( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) @@ -3496,78 +3208,6 @@ return vmulhu_vx_u32m8_m(mask, maskedoff, op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vmulhu_vv_u64m1_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) { - return vmulhu_vx_u64m1_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vmulhu_vv_u64m2_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) { - return vmulhu_vx_u64m2_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vmulhu_vv_u64m4_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) { - return vmulhu_vx_u64m4_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vmulhu_vv_u64m8_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) { - return vmulhu_vx_u64m8_m(mask, maskedoff, op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8_m( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) @@ -3891,75 +3531,3 @@ vint32m8_t test_vmulhsu_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t op1, uint32_t op2, size_t vl) { return vmulhsu_vx_i32m8_m(mask, maskedoff, op1, op2, vl); } - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) { - return vmulhsu_vv_i64m1_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) { - return vmulhsu_vx_i64m1_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) { - return vmulhsu_vv_i64m2_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) { - return vmulhsu_vx_i64m2_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) { - return vmulhsu_vv_i64m4_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) { - return vmulhsu_vx_i64m4_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) { - return vmulhsu_vv_i64m8_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) { - return vmulhsu_vx_i64m8_m(mask, maskedoff, op1, op2, vl); -} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c @@ -0,0 +1,159 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// NOTE: The purpose of separating these 3 instructions from vsmul.c is that +// eew=64 versions only enable when V extension is specified. (Not for zve) + +#include + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) { + return vsmul_vv_i64m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) { + return vsmul_vx_i64m1(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) { + return vsmul_vv_i64m2(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) { + return vsmul_vx_i64m2(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) { + return vsmul_vv_i64m4(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) { + return vsmul_vx_i64m4(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) { + return vsmul_vv_i64m8(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) { + return vsmul_vx_i64m8(op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, + vint64m1_t op1, vint64m1_t op2, size_t vl) { + return vsmul_vv_i64m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, + vint64m1_t op1, int64_t op2, size_t vl) { + return vsmul_vx_i64m1_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, + vint64m2_t op1, vint64m2_t op2, size_t vl) { + return vsmul_vv_i64m2_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, + vint64m2_t op1, int64_t op2, size_t vl) { + return vsmul_vx_i64m2_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, + vint64m4_t op1, vint64m4_t op2, size_t vl) { + return vsmul_vv_i64m4_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, + vint64m4_t op1, int64_t op2, size_t vl) { + return vsmul_vx_i64m4_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, + vint64m8_t op1, vint64m8_t op2, size_t vl) { + return vsmul_vv_i64m8_m(mask, maskedoff, op1, op2, vl); +} + +// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, + vint64m8_t op1, int64_t op2, size_t vl) { + return vsmul_vx_i64m8_m(mask, maskedoff, op1, op2, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include @@ -328,78 +328,6 @@ return vsmul_vx_i32m8(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vsmul_vv_i64m1(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) { - return vsmul_vx_i64m1(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vsmul_vv_i64m2(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) { - return vsmul_vx_i64m2(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vsmul_vv_i64m4(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) { - return vsmul_vx_i64m4(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vsmul_vv_i64m8(op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) { - return vsmul_vx_i64m8(op1, op2, vl); -} - // CHECK-RV64-LABEL: @test_vsmul_vv_i8mf8_m( // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) @@ -762,83 +690,3 @@ vint32m8_t op1, int32_t op2, size_t vl) { return vsmul_vx_i32m8_m(mask, maskedoff, op1, op2, vl); } - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, - vint64m1_t op1, vint64m1_t op2, size_t vl) { - return vsmul_vv_i64m1_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, - vint64m1_t op1, int64_t op2, size_t vl) { - return vsmul_vx_i64m1_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, - vint64m2_t op1, vint64m2_t op2, size_t vl) { - return vsmul_vv_i64m2_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, - vint64m2_t op1, int64_t op2, size_t vl) { - return vsmul_vx_i64m2_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, - vint64m4_t op1, vint64m4_t op2, size_t vl) { - return vsmul_vv_i64m4_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, - vint64m4_t op1, int64_t op2, size_t vl) { - return vsmul_vx_i64m4_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, - vint64m8_t op1, vint64m8_t op2, size_t vl) { - return vsmul_vv_i64m8_m(mask, maskedoff, op1, op2, vl); -} - -// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vsmul.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]], [[OP1:%.*]], i64 [[OP2:%.*]], [[MASK:%.*]], i64 [[VL:%.*]], i64 0) -// CHECK-RV64-NEXT: ret [[TMP0]] -// -vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, - vint64m8_t op1, int64_t op2, size_t vl) { - return vsmul_vx_i64m8_m(mask, maskedoff, op1, op2, vl); -} diff --git a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp --- a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp +++ b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Transformer/SourceCodeBuilders.h" +#include "clang/AST/Type.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Tooling/Tooling.h" @@ -24,8 +25,23 @@ // Create a valid translation unit from a statement. static std::string wrapSnippet(StringRef StatementCode) { - return ("struct S { S(); S(int); int field; };\n" + return ("namespace std {\n" + "template struct unique_ptr {\n" + " T* operator->() const;\n" + " T& operator*() const;\n" + "};\n" + "template struct shared_ptr {\n" + " T* operator->() const;\n" + " T& operator*() const;\n" + "};\n" + "}\n" + "struct A { void super(); };\n" + "struct S : public A { S(); S(int); int Field; };\n" "S operator+(const S &a, const S &b);\n" + "struct Smart {\n" + " S* operator->() const;\n" + " S& operator*() const;\n" + "};\n" "auto test_snippet = []{" + StatementCode + "};") .str(); @@ -51,7 +67,8 @@ // `StatementCode` may contain other statements not described by `Matcher`. static llvm::Optional matchStmt(StringRef StatementCode, StatementMatcher Matcher) { - auto AstUnit = buildASTFromCode(wrapSnippet(StatementCode)); + auto AstUnit = buildASTFromCodeWithArgs(wrapSnippet(StatementCode), + {"-Wno-unused-value"}); if (AstUnit == nullptr) { ADD_FAILURE() << "AST construction failed"; return llvm::None; @@ -95,7 +112,7 @@ testPredicate(needParensAfterUnaryOperator, "int(3.0);", false); testPredicate(needParensAfterUnaryOperator, "void f(); f();", false); testPredicate(needParensAfterUnaryOperator, "int a[3]; a[0];", false); - testPredicate(needParensAfterUnaryOperator, "S x; x.field;", false); + testPredicate(needParensAfterUnaryOperator, "S x; x.Field;", false); testPredicate(needParensAfterUnaryOperator, "int x = 1; --x;", false); testPredicate(needParensAfterUnaryOperator, "int x = 1; -x;", false); } @@ -117,7 +134,7 @@ testPredicate(mayEverNeedParens, "int(3.0);", false); testPredicate(mayEverNeedParens, "void f(); f();", false); testPredicate(mayEverNeedParens, "int a[3]; a[0];", false); - testPredicate(mayEverNeedParens, "S x; x.field;", false); + testPredicate(mayEverNeedParens, "S x; x.Field;", false); } TEST(SourceCodeBuildersTest, mayEverNeedParensInImplictConversion) { @@ -126,6 +143,50 @@ testPredicateOnArg(mayEverNeedParens, "void f(S); f(3 + 5);", true); } +TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeUniquePtr) { + std::string Snippet = "std::unique_ptr P; P;"; + auto StmtMatch = + matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty")))); + ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet; + EXPECT_TRUE( + isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs("ty"), + *StmtMatch->Result.Context)) + << "Snippet: " << Snippet; +} + +TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeSharedPtr) { + std::string Snippet = "std::shared_ptr P; P;"; + auto StmtMatch = + matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty")))); + ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet; + EXPECT_TRUE( + isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs("ty"), + *StmtMatch->Result.Context)) + << "Snippet: " << Snippet; +} + +TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeUnknownTypeFalse) { + std::string Snippet = "Smart P; P;"; + auto StmtMatch = + matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty")))); + ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet; + EXPECT_FALSE( + isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs("ty"), + *StmtMatch->Result.Context)) + << "Snippet: " << Snippet; +} + +TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeNormalTypeFalse) { + std::string Snippet = "int *P; P;"; + auto StmtMatch = + matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty")))); + ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet; + EXPECT_FALSE( + isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs("ty"), + *StmtMatch->Result.Context)) + << "Snippet: " << Snippet; +} + static void testBuilder( llvm::Optional (*Builder)(const Expr &, const ASTContext &), StringRef Snippet, StringRef Expected) { @@ -136,6 +197,15 @@ ValueIs(std::string(Expected))); } +static void testBuildAccess(StringRef Snippet, StringRef Expected, + PLTClass C = PLTClass::Pointer) { + auto StmtMatch = matchStmt(Snippet, expr().bind("expr")); + ASSERT_TRUE(StmtMatch); + EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"), + *StmtMatch->Result.Context, C), + ValueIs(std::string(Expected))); +} + TEST(SourceCodeBuildersTest, BuildParensUnaryOp) { testBuilder(buildParens, "-4;", "(-4)"); } @@ -245,4 +315,117 @@ TEST(SourceCodeBuildersTest, BuildArrowValueAddressWithParens) { testBuilder(buildArrow, "S x; &(true ? x : x);", "(true ? x : x)."); } + +TEST(SourceCodeBuildersTest, BuildAccessValue) { + testBuildAccess("S x; x;", "x."); +} + +TEST(SourceCodeBuildersTest, BuildAccessPointerDereference) { + testBuildAccess("S *x; *x;", "x->"); +} + +TEST(SourceCodeBuildersTest, BuildAccessPointerDereferenceIgnoresParens) { + testBuildAccess("S *x; *(x);", "x->"); +} + +TEST(SourceCodeBuildersTest, BuildAccessValueBinaryOperation) { + testBuildAccess("S x; x + x;", "(x + x)."); +} + +TEST(SourceCodeBuildersTest, BuildAccessPointerDereferenceExprWithParens) { + testBuildAccess("S *x; *(x + 1);", "(x + 1)->"); +} + +TEST(SourceCodeBuildersTest, BuildAccessPointer) { + testBuildAccess("S *x; x;", "x->"); +} + +TEST(SourceCodeBuildersTest, BuildAccessValueAddress) { + testBuildAccess("S x; &x;", "x."); +} + +TEST(SourceCodeBuildersTest, BuildAccessValueAddressIgnoresParens) { + testBuildAccess("S x; &(x);", "x."); +} + +TEST(SourceCodeBuildersTest, BuildAccessPointerBinaryOperation) { + testBuildAccess("S *x; x + 1;", "(x + 1)->"); +} + +TEST(SourceCodeBuildersTest, BuildAccessValueAddressWithParens) { + testBuildAccess("S x; &(true ? x : x);", "(true ? x : x)."); +} + +TEST(SourceCodeBuildersTest, BuildAccessSmartPointer) { + testBuildAccess("std::unique_ptr x; x;", "x->"); +} + +TEST(SourceCodeBuildersTest, BuildAccessSmartPointerAsValue) { + testBuildAccess("std::unique_ptr x; x;", "x.", PLTClass::Value); +} + +TEST(SourceCodeBuildersTest, BuildAccessSmartPointerDeref) { + testBuildAccess("std::unique_ptr x; *x;", "x->"); +} + +TEST(SourceCodeBuildersTest, BuildAccessSmartPointerDerefAsValue) { + testBuildAccess("std::unique_ptr x; *x;", "(*x).", PLTClass::Value); +} + +TEST(SourceCodeBuildersTest, BuildAccessSmartPointerMemberCall) { + StringRef Snippet = R"cc( + Smart x; + x->Field; + )cc"; + auto StmtMatch = + matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind("expr")))); + ASSERT_TRUE(StmtMatch); + EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"), + *StmtMatch->Result.Context), + ValueIs(std::string("x->"))); +} + +TEST(SourceCodeBuildersTest, BuildAccessIgnoreImplicit) { + StringRef Snippet = R"cc( + S x; + A *a; + a = &x; + )cc"; + auto StmtMatch = + matchStmt(Snippet, binaryOperator(isAssignmentOperator(), + hasRHS(expr().bind("expr")))); + ASSERT_TRUE(StmtMatch); + EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"), + *StmtMatch->Result.Context), + ValueIs(std::string("x."))); +} + +TEST(SourceCodeBuildersTest, BuildAccessImplicitThis) { + StringRef Snippet = R"cc( + struct Struct { + void foo() {} + void bar() { + foo(); + } + }; + )cc"; + auto StmtMatch = matchStmt( + Snippet, + cxxMemberCallExpr(onImplicitObjectArgument(cxxThisExpr().bind("expr")))); + ASSERT_TRUE(StmtMatch); + EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"), + *StmtMatch->Result.Context), + ValueIs(std::string())); +} + +TEST(SourceCodeBuildersTest, BuildAccessImplicitThisIgnoreImplicitCasts) { + StringRef Snippet = "struct B : public A { void f() { super(); } };"; + auto StmtMatch = matchStmt( + Snippet, + cxxMemberCallExpr(onImplicitObjectArgument(expr().bind("expr")))); + ASSERT_TRUE(StmtMatch); + EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"), + *StmtMatch->Result.Context), + ValueIs(std::string())); +} } // namespace diff --git a/clang/unittests/Tooling/StencilTest.cpp b/clang/unittests/Tooling/StencilTest.cpp --- a/clang/unittests/Tooling/StencilTest.cpp +++ b/clang/unittests/Tooling/StencilTest.cpp @@ -36,10 +36,13 @@ namespace N { class C {}; } namespace { class AnonC {}; } struct S { int Field; }; - struct Smart { - S* operator->() const; - S& operator*() const; + namespace std { + template + struct unique_ptr { + T* operator->() const; + T& operator*() const; }; + } )cc"; return (Preface + ExtraPreface + "auto stencil_test_snippet = []{" + StatementCode + "};") @@ -326,32 +329,15 @@ TEST_F(StencilTest, MaybeDerefSmartPointer) { StringRef Id = "id"; std::string Snippet = R"cc( - Smart x; + std::unique_ptr x; x; )cc"; testExpr(Id, Snippet, maybeDeref(Id), "*x"); } -// Tests that unique_ptr specifically is handled. -TEST_F(StencilTest, MaybeDerefSmartPointerUniquePtr) { - StringRef Id = "id"; - // We deliberately specify `unique_ptr` as empty to verify that it matches - // because of its name, rather than its contents. - StringRef ExtraPreface = - "namespace std { template class unique_ptr {}; }\n"; - StringRef Snippet = R"cc( - std::unique_ptr x; - x; - )cc"; - auto StmtMatch = matchStmt(Snippet, expr().bind(Id), ExtraPreface); - ASSERT_TRUE(StmtMatch); - EXPECT_THAT_EXPECTED(maybeDeref(Id)->eval(StmtMatch->Result), - HasValue(std::string("*x"))); -} - TEST_F(StencilTest, MaybeDerefSmartPointerFromMemberExpr) { StringRef Id = "id"; - std::string Snippet = "Smart x; x->Field;"; + std::string Snippet = "std::unique_ptr x; x->Field;"; auto StmtMatch = matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind(Id)))); ASSERT_TRUE(StmtMatch); @@ -381,12 +367,12 @@ TEST_F(StencilTest, MaybeAddressOfSmartPointer) { StringRef Id = "id"; - testExpr(Id, "Smart x; x;", maybeAddressOf(Id), "x"); + testExpr(Id, "std::unique_ptr x; x;", maybeAddressOf(Id), "x"); } TEST_F(StencilTest, MaybeAddressOfSmartPointerFromMemberCall) { StringRef Id = "id"; - std::string Snippet = "Smart x; x->Field;"; + std::string Snippet = "std::unique_ptr x; x->Field;"; auto StmtMatch = matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind(Id)))); ASSERT_TRUE(StmtMatch); @@ -396,7 +382,7 @@ TEST_F(StencilTest, MaybeAddressOfSmartPointerDerefNoCancel) { StringRef Id = "id"; - testExpr(Id, "Smart x; *x;", maybeAddressOf(Id), "&*x"); + testExpr(Id, "std::unique_ptr x; *x;", maybeAddressOf(Id), "&*x"); } TEST_F(StencilTest, AccessOpValue) { @@ -446,7 +432,7 @@ TEST_F(StencilTest, AccessOpSmartPointer) { StringRef Snippet = R"cc( - Smart x; + std::unique_ptr x; x; )cc"; StringRef Id = "id"; @@ -455,7 +441,7 @@ TEST_F(StencilTest, AccessOpSmartPointerDereference) { StringRef Snippet = R"cc( - Smart x; + std::unique_ptr x; *x; )cc"; StringRef Id = "id"; @@ -464,7 +450,7 @@ TEST_F(StencilTest, AccessOpSmartPointerMemberCall) { StringRef Snippet = R"cc( - Smart x; + std::unique_ptr x; x->Field; )cc"; StringRef Id = "id"; diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -141,11 +141,12 @@ enum RISCVPredefinedMacro : RISCVPredefinedMacroT { Basic = 0, - Zfh = 1 << 1, - RV64 = 1 << 2, - VectorMaxELen64 = 1 << 3, - VectorMaxELenFp32 = 1 << 4, - VectorMaxELenFp64 = 1 << 5, + V = 1 << 1, + Zfh = 1 << 2, + RV64 = 1 << 3, + VectorMaxELen64 = 1 << 4, + VectorMaxELenFp32 = 1 << 5, + VectorMaxELenFp64 = 1 << 6, }; // TODO refactor RVVIntrinsic class design after support all intrinsic @@ -808,6 +809,11 @@ for (auto Feature : RequiredFeatures) { if (Feature == "RV64") RISCVPredefinedMacros |= RISCVPredefinedMacro::RV64; + // Note: Full multiply instruction (mulh, mulhu, mulhsu, smul) for EEW=64 + // require V. + if (Feature == "FullMultiply" && + (RISCVPredefinedMacros & RISCVPredefinedMacro::VectorMaxELen64)) + RISCVPredefinedMacros |= RISCVPredefinedMacro::V; } // Init OutputType and InputTypes @@ -1314,6 +1320,8 @@ return false; OS << "#if "; ListSeparator LS(" && "); + if (PredefinedMacros & RISCVPredefinedMacro::V) + OS << LS << "defined(__riscv_v)"; if (PredefinedMacros & RISCVPredefinedMacro::Zfh) OS << LS << "defined(__riscv_zfh)"; if (PredefinedMacros & RISCVPredefinedMacro::RV64) diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c --- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c +++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c @@ -1,12 +1,18 @@ // REQUIRES: zlib // Value profiling is currently not supported in lightweight mode. -// RUN: %clang_pgogen -o %t -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp -// RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t -// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite - // RUN: %clang_pgogen -o %t.normal -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal // RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw +// RUN: %clang_pgogen -o %t.d4 -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: env LLVM_PROFILE_FILE=%t.d4.proflite %run %t.d4 +// RUN: llvm-profdata merge -o %t.d4.profdata --debug-info=%t.d4 %t.d4.proflite + +// RUN: diff %t.normal.profdata %t.d4.profdata + +// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t +// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite + // RUN: diff %t.normal.profdata %t.profdata diff --git a/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c b/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c --- a/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c +++ b/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c @@ -1,6 +1,6 @@ // Test __llvm_profile_get_filename when the on-line merging mode is enabled. // -// RUN: %clang_pgogen -fPIC -shared -o %t.dso %p/../Inputs/instrprof-get-filename-dso.c +// RUN: %clang_pgogen -fPIC -shared %shared_linker_xopts -o %t.dso %p/../Inputs/instrprof-get-filename-dso.c // RUN: %clang_pgogen -o %t %s %t.dso // RUN: env LLVM_PROFILE_FILE="%t-%m.profraw" %run %t diff --git a/compiler-rt/test/profile/Posix/lit.local.cfg.py b/compiler-rt/test/profile/Posix/lit.local.cfg.py --- a/compiler-rt/test/profile/Posix/lit.local.cfg.py +++ b/compiler-rt/test/profile/Posix/lit.local.cfg.py @@ -7,3 +7,10 @@ if root.host_os in ['Windows']: config.unsupported = True + +# AIX usually usually makes use of an explicit export list when linking a shared +# object, but for the purposes of these tests just export all symbols. +if root.host_os in ['AIX']: + config.substitutions.append(('%shared_linker_xopts', '-Wl,-bexpfull')) +else: + config.substitutions.append(('%shared_linker_xopts', '')) diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv --- a/libcxx/docs/Status/FormatIssues.csv +++ b/libcxx/docs/Status/FormatIssues.csv @@ -1,6 +1,6 @@ Number,Name,Assignee,Patch,Status,First released version `P0645 `_,"Text Formatting",Mark de Wever,,|Complete|,Clang 14 -`P1652 `_,"Printf corner cases in std::format",Mark de Wever,"`D103433 `__, `D114001 `__",|Review|, +`P1652 `_,"Printf corner cases in std::format",Mark de Wever,"`D103433 `__, `D114001 `__",|Complete|,Clang 14 `P1892 `_,"Extended locale-specific presentation specifiers for std::format",Mark de Wever,`D103368 `__,|Complete|,Clang 14 `P1868 `_,"width: clarifying units of width and precision in std::format (Implements the unicode support.)",Mark de Wever,"`D103413 `__ `D103425 `__ `D103670 `__",|Complete|,Clang 14 `P2216 `_,"std::format improvements",Mark de Wever,,|In Progress|, diff --git a/libcxx/docs/Status/FormatPaper.csv b/libcxx/docs/Status/FormatPaper.csv --- a/libcxx/docs/Status/FormatPaper.csv +++ b/libcxx/docs/Status/FormatPaper.csv @@ -6,21 +6,21 @@ `[format.context] `_,"Class template basic_format_context",,Mark de Wever,`D103357 `__,|Complete|,Clang 14 `[format.args] `_,"Class template basic_format_args",,Mark de Wever,`D103357 `__,|Complete|,Clang 14 `[format.arg] `_,"Class template basic_format_arg",,Mark de Wever,`D103357 `__,|Complete|,Clang 14 -`[format.arg] `_,"Class template basic_format_arg - handle",,Unassigned,,|Not Started|, -`[format.arg] `_,"Class template basic_format_arg - pointers",,Mark de Wever,,|In Progress|, +`[format.arg] `_,"Class template basic_format_arg - handle",,Mark de Wever,,|Complete|,Clang 14 +`[format.arg] `_,"Class template basic_format_arg - pointers",,Mark de Wever,,|Complete|,Clang 14 `[format.arg.store] `_,"Class template format-arg-store",,Mark de Wever,`D103357 `__,|Complete|,Clang 14 `[format.formatter.spec] `_,"Formatter specializations - character types",,Mark de Wever,"`D96664 `__ `D103466 `__",|Complete|,Clang 14 `[format.formatter.spec] `_,"Formatter specializations - string types",,Mark de Wever,"`D96664 `__ `D103425 `__",|Complete|,Clang 14 `[format.formatter.spec] `_,"Formatter specializations - boolean type",,Mark de Wever,"`D96664 `__ `D103670 `__",|Complete|,Clang 14 `[format.formatter.spec] `_,"Formatter specializations - integral types",,Mark de Wever,"`D96664 `__ `D103433 `__",|Complete|,Clang 14 -`[format.formatter.spec] `_,"Formatter specializations - floating-point types",`D70631 `__,Mark de Wever,`D114001 `__,|Review|, -`[format.formatter.spec] `_,"Formatter specializations - pointer types",,Mark de Wever,,|In Progress|, +`[format.formatter.spec] `_,"Formatter specializations - floating-point types",`D70631 `__,Mark de Wever,`D114001 `__,|Complete|,Clang 14 +`[format.formatter.spec] `_,"Formatter specializations - pointer types",,Mark de Wever,,|Complete|,Clang 14 `[format.string.std] `_,"Standard format specifiers - character types",,Mark de Wever,`D103368 `__,|Complete|,Clang 14 `[format.string.std] `_,"Standard format specifiers - string types",`D103379 `__,Mark de Wever,"`D103368 `__ `D103413 `__",|Complete|,Clang 14 `[format.string.std] `_,"Standard format specifiers - boolean type",`D103379 `__,Mark de Wever,"`D103368 `__ `D103413 `__",|Complete|,Clang 14 `[format.string.std] `_,"Standard format specifiers - integral types",,Mark de Wever,`D103368 `__,|Complete|,Clang 14 -`[format.string.std] `_,"Standard format specifiers - floating-point types",,Mark de Wever,`D114001 `__,|Review|, -`[format.string.std] `_,"Standard format specifiers - pointer types",,Mark de Wever,,|In Progress|, +`[format.string.std] `_,"Standard format specifiers - floating-point types",,Mark de Wever,`D114001 `__,|Complete|,Clang 14 +`[format.string.std] `_,"Standard format specifiers - pointer types",,Mark de Wever,,|Complete|,Clang 14 `[format.functions] `_,"Format functions - format(string_view fmt, const Args&... args);",,Mark de Wever,`D96664 `__,|Complete|,Clang 14 `[format.functions] `_,"Format functions - format(wstring_view fmt, const Args&... args);",,Mark de Wever,`D96664 `__,|Complete|,Clang 14 `[format.functions] `_,"Format functions - format(const locale& loc, string_view fmt, const Args&... args);",,Mark de Wever,`D96664 `__,|Complete|,Clang 14 diff --git a/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp --- a/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp @@ -20,13 +20,13 @@ int main(int, char**) { { - std::allocator alloc; + std::allocator alloc; const std::vector vb(alloc); assert(vb.get_allocator() == alloc); } { - other_allocator alloc(1); - const std::vector > vb(alloc); + other_allocator alloc(1); + const std::vector > vb(alloc); assert(vb.get_allocator() == alloc); } diff --git a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp @@ -27,33 +27,31 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { std::basic_string out = std::format(std::locale(), fmt, args...); if constexpr (std::same_as) if (out != expected) - std::cerr << "\nFormat string " << fmt << "\nExpected output " - << expected << "\nActual output " << out << '\n'; + std::cerr << "\nFormat string " << fmt << "\nExpected output " << expected << "\nActual output " << out + << '\n'; assert(out == expected); }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::format(std::locale(), fmt, args...); if constexpr (std::same_as) - std::cerr << "\nFormat string " << fmt - << "\nDidn't throw an exception.\n"; + std::cerr << "\nFormat string " << fmt << "\nDidn't throw an exception.\n"; assert(false); } catch (std::format_error& e) { -#ifdef _LIBCPP_VERSION +# ifdef _LIBCPP_VERSION if constexpr (std::same_as) if (e.what() != what) - std::cerr << "\nFormat string " << fmt << "\nExpected exception " - << what << "\nActual exception " << e.what() << '\n'; -#endif + std::cerr << "\nFormat string " << fmt << "\nExpected exception " << what << "\nActual exception " + << e.what() << '\n'; +# endif LIBCPP_ASSERT(e.what() == what); return; } diff --git a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp @@ -25,44 +25,42 @@ #include #include #ifndef _LIBCPP_HAS_NO_LOCALIZATION -#include +# include #endif #include #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { std::basic_string out = std::format(fmt, args...); #ifndef _LIBCPP_HAS_NO_LOCALIZATION if constexpr (std::same_as) if (out != expected) - std::cerr << "\nFormat string " << fmt << "\nExpected output " - << expected << "\nActual output " << out << '\n'; + std::cerr << "\nFormat string " << fmt << "\nExpected output " << expected << "\nActual output " << out + << '\n'; #endif assert(out == expected); }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::format(fmt, args...); -#ifndef _LIBCPP_HAS_NO_LOCALIZATION +# ifndef _LIBCPP_HAS_NO_LOCALIZATION if constexpr (std::same_as) - std::cerr << "\nFormat string " << fmt - << "\nDidn't throw an exception.\n"; -#endif + std::cerr << "\nFormat string " << fmt << "\nDidn't throw an exception.\n"; +# endif assert(false); } catch (std::format_error& e) { -#if defined(_LIBCPP_VERSION) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if defined(_LIBCPP_VERSION) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) if constexpr (std::same_as) if (e.what() != what) - std::cerr << "\nFormat string " << fmt << "\nExpected exception " - << what << "\nActual exception " << e.what() << '\n'; -#endif + std::cerr << "\nFormat string " << fmt << "\nExpected exception " << what << "\nActual exception " + << e.what() << '\n'; +# endif LIBCPP_ASSERT(e.what() == what); return; } diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h --- a/libcxx/test/std/utilities/format/format.functions/format_tests.h +++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h @@ -134,10 +134,10 @@ std::vector> invalid_types(std::string valid) { std::vector> result; -#define CASE(T) \ - case #T[0]: \ - result.push_back(STR("Invalid formatter type {:" #T "}")); \ - break; +#define CASE(T) \ +case #T[0]: \ + result.push_back(STR("Invalid formatter type {:" #T "}")); \ + break; for (auto type : "aAbBcdeEfFgGopsxX") { if (valid.find(type) != std::string::npos) @@ -173,18 +173,15 @@ } template -void format_test_string(T world, T universe, TestFunction check, - ExceptionTest check_exception) { +void format_test_string(T world, T universe, TestFunction check, ExceptionTest check_exception) { // *** Valid input tests *** // Unsed argument is ignored. TODO FMT what does the Standard mandate? check(STR("hello world"), STR("hello {}"), world, universe); - check(STR("hello world and universe"), STR("hello {} and {}"), world, - universe); + check(STR("hello world and universe"), STR("hello {} and {}"), world, universe); check(STR("hello world"), STR("hello {0}"), world, universe); check(STR("hello universe"), STR("hello {1}"), world, universe); - check(STR("hello universe and world"), STR("hello {1} and {0}"), world, - universe); + check(STR("hello universe and world"), STR("hello {1} and {0}"), world, universe); check(STR("hello world"), STR("hello {:_>}"), world); check(STR("hello world"), STR("hello {:>8}"), world); @@ -225,97 +222,69 @@ check(STR("hello uni#####"), STR("hello {:#<8.3s}"), universe); // *** sign *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("hello {:-}"), world); + check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:-}"), world); // *** alternate form *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("hello {:#}"), world); + check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:#}"), world); // *** zero-padding *** - check_exception("A format-spec width field shouldn't have a leading zero", - STR("hello {:0}"), world); + check_exception("A format-spec width field shouldn't have a leading zero", STR("hello {:0}"), world); // *** width *** #ifdef _LIBCPP_VERSION // This limit isn't specified in the Standard. - static_assert(std::__format::__number_max == 2'147'483'647, - "Update the assert and the test."); - check_exception("The numeric value of the format-spec is too large", - STR("{:2147483648}"), world); - check_exception("The numeric value of the format-spec is too large", - STR("{:5000000000}"), world); - check_exception("The numeric value of the format-spec is too large", - STR("{:10000000000}"), world); + static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test."); + check_exception("The numeric value of the format-spec is too large", STR("{:2147483648}"), world); + check_exception("The numeric value of the format-spec is too large", STR("{:5000000000}"), world); + check_exception("The numeric value of the format-spec is too large", STR("{:10000000000}"), world); #endif - check_exception( - "A format-spec width field replacement should have a positive value", - STR("hello {:{}}"), world, 0); - check_exception( - "A format-spec arg-id replacement shouldn't have a negative value", - STR("hello {:{}}"), world, -1); - check_exception( - "A format-spec arg-id replacement exceeds the maximum supported value", - STR("hello {:{}}"), world, unsigned(-1)); + check_exception("A format-spec width field replacement should have a positive value", STR("hello {:{}}"), world, 0); + check_exception("A format-spec arg-id replacement shouldn't have a negative value", STR("hello {:{}}"), world, -1); + check_exception("A format-spec arg-id replacement exceeds the maximum supported value", STR("hello {:{}}"), world, + unsigned(-1)); check_exception("Argument index out of bounds", STR("hello {:{}}"), world); - check_exception( - "A format-spec arg-id replacement argument isn't an integral type", - STR("hello {:{}}"), world, universe); - check_exception( - "Using manual argument numbering in automatic argument numbering mode", - STR("hello {:{0}}"), world, 1); - check_exception( - "Using automatic argument numbering in manual argument numbering mode", - STR("hello {0:{}}"), world, 1); + check_exception("A format-spec arg-id replacement argument isn't an integral type", STR("hello {:{}}"), world, + universe); + check_exception("Using manual argument numbering in automatic argument numbering mode", STR("hello {:{0}}"), world, + 1); + check_exception("Using automatic argument numbering in manual argument numbering mode", STR("hello {0:{}}"), world, + 1); // Arg-id may not have leading zeros. check_exception("Invalid arg-id", STR("hello {0:{01}}"), world, 1); // *** precision *** #ifdef _LIBCPP_VERSION // This limit isn't specified in the Standard. - static_assert(std::__format::__number_max == 2'147'483'647, - "Update the assert and the test."); - check_exception("The numeric value of the format-spec is too large", - STR("{:.2147483648}"), world); - check_exception("The numeric value of the format-spec is too large", - STR("{:.5000000000}"), world); - check_exception("The numeric value of the format-spec is too large", - STR("{:.10000000000}"), world); + static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test."); + check_exception("The numeric value of the format-spec is too large", STR("{:.2147483648}"), world); + check_exception("The numeric value of the format-spec is too large", STR("{:.5000000000}"), world); + check_exception("The numeric value of the format-spec is too large", STR("{:.10000000000}"), world); #endif // Precision 0 allowed, but not useful for string arguments. check(STR("hello "), STR("hello {:.{}}"), world, 0); // Precision may have leading zeros. Secondly tests the value is still base 10. check(STR("hello 0123456789"), STR("hello {:.000010}"), STR("0123456789abcdef")); - check_exception( - "A format-spec arg-id replacement shouldn't have a negative value", - STR("hello {:.{}}"), world, -1); - check_exception( - "A format-spec arg-id replacement exceeds the maximum supported value", - STR("hello {:.{}}"), world, ~0u); + check_exception("A format-spec arg-id replacement shouldn't have a negative value", STR("hello {:.{}}"), world, -1); + check_exception("A format-spec arg-id replacement exceeds the maximum supported value", STR("hello {:.{}}"), world, + ~0u); check_exception("Argument index out of bounds", STR("hello {:.{}}"), world); - check_exception( - "A format-spec arg-id replacement argument isn't an integral type", - STR("hello {:.{}}"), world, universe); - check_exception( - "Using manual argument numbering in automatic argument numbering mode", - STR("hello {:.{0}}"), world, 1); - check_exception( - "Using automatic argument numbering in manual argument numbering mode", - STR("hello {0:.{}}"), world, 1); + check_exception("A format-spec arg-id replacement argument isn't an integral type", STR("hello {:.{}}"), world, + universe); + check_exception("Using manual argument numbering in automatic argument numbering mode", STR("hello {:.{0}}"), world, + 1); + check_exception("Using automatic argument numbering in manual argument numbering mode", STR("hello {0:.{}}"), world, + 1); // Arg-id may not have leading zeros. check_exception("Invalid arg-id", STR("hello {0:.{01}}"), world, 1); // *** locale-specific form *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("hello {:L}"), world); + check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:L}"), world); // *** type *** for (const auto& fmt : invalid_types("s")) - check_exception( - "The format-spec type has a type not supported for a string argument", - fmt, world); + check_exception("The format-spec type has a type not supported for a string argument", fmt, world); } template @@ -364,13 +333,10 @@ // Testing the char const[] is a bit tricky due to array to pointer decay. // Since there are separate tests in format.formatter.spec the array is not // tested here. - format_test_string(world.c_str(), universe.c_str(), check, + format_test_string(world.c_str(), universe.c_str(), check, check_exception); + format_test_string(const_cast(world.c_str()), const_cast(universe.c_str()), check, check_exception); - format_test_string(const_cast(world.c_str()), - const_cast(universe.c_str()), check, - check_exception); - format_test_string(std::basic_string_view(world), - std::basic_string_view(universe), check, + format_test_string(std::basic_string_view(world), std::basic_string_view(universe), check, check_exception); format_test_string(world, universe, check, check_exception); format_test_string_unicode(check); @@ -399,60 +365,41 @@ check(STR("answer is '-false--'"), STR("answer is '{:-^8s}'"), false); // *** Sign *** - check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"), - true); - check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"), - true); - check_exception("A sign field isn't allowed in this format-spec", STR("{: }"), - true); - - check_exception("A sign field isn't allowed in this format-spec", - STR("{:-s}"), true); - check_exception("A sign field isn't allowed in this format-spec", - STR("{:+s}"), true); - check_exception("A sign field isn't allowed in this format-spec", - STR("{: s}"), true); + check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"), true); + check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"), true); + check_exception("A sign field isn't allowed in this format-spec", STR("{: }"), true); + + check_exception("A sign field isn't allowed in this format-spec", STR("{:-s}"), true); + check_exception("A sign field isn't allowed in this format-spec", STR("{:+s}"), true); + check_exception("A sign field isn't allowed in this format-spec", STR("{: s}"), true); // *** alternate form *** - check_exception("An alternate form field isn't allowed in this format-spec", - STR("{:#}"), true); - check_exception("An alternate form field isn't allowed in this format-spec", - STR("{:#s}"), true); + check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#}"), true); + check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#s}"), true); // *** zero-padding *** - check_exception("A zero-padding field isn't allowed in this format-spec", - STR("{:0}"), true); - check_exception("A zero-padding field isn't allowed in this format-spec", - STR("{:0s}"), true); + check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0}"), true); + check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0s}"), true); // *** precision *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.}"), true); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0}"), true); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42}"), true); - - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.s}"), true); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0s}"), true); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42s}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), true); + + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.s}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0s}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42s}"), true); // *** locale-specific form *** // See locale-specific_form.pass.cpp // *** type *** for (const auto& fmt : invalid_types("bBcdosxX")) - check_exception( - "The format-spec type has a type not supported for a bool argument", - fmt, true); + check_exception("The format-spec type has a type not supported for a bool argument", fmt, true); } template -void format_test_bool_as_char(TestFunction check, - ExceptionTest check_exception) { +void format_test_bool_as_char(TestFunction check, ExceptionTest check_exception) { // *** align-fill & width *** check(STR("answer is '\1 '"), STR("answer is '{:6c}'"), true); check(STR("answer is ' \1'"), STR("answer is '{:>6c}'"), true); @@ -463,47 +410,31 @@ check(STR("answer is '\1-----'"), STR("answer is '{:-<6c}'"), true); check(STR("answer is '--\1---'"), STR("answer is '{:-^6c}'"), true); - check(std::basic_string(CSTR("answer is '\0 '"), 18), - STR("answer is '{:6c}'"), false); - check(std::basic_string(CSTR("answer is '\0 '"), 18), - STR("answer is '{:6c}'"), false); - check(std::basic_string(CSTR("answer is ' \0'"), 18), - STR("answer is '{:>6c}'"), false); - check(std::basic_string(CSTR("answer is '\0 '"), 18), - STR("answer is '{:<6c}'"), false); - check(std::basic_string(CSTR("answer is ' \0 '"), 18), - STR("answer is '{:^6c}'"), false); - - check(std::basic_string(CSTR("answer is '-----\0'"), 18), - STR("answer is '{:->6c}'"), false); - check(std::basic_string(CSTR("answer is '\0-----'"), 18), - STR("answer is '{:-<6c}'"), false); - check(std::basic_string(CSTR("answer is '--\0---'"), 18), - STR("answer is '{:-^6c}'"), false); + check(std::basic_string(CSTR("answer is '\0 '"), 18), STR("answer is '{:6c}'"), false); + check(std::basic_string(CSTR("answer is '\0 '"), 18), STR("answer is '{:6c}'"), false); + check(std::basic_string(CSTR("answer is ' \0'"), 18), STR("answer is '{:>6c}'"), false); + check(std::basic_string(CSTR("answer is '\0 '"), 18), STR("answer is '{:<6c}'"), false); + check(std::basic_string(CSTR("answer is ' \0 '"), 18), STR("answer is '{:^6c}'"), false); + + check(std::basic_string(CSTR("answer is '-----\0'"), 18), STR("answer is '{:->6c}'"), false); + check(std::basic_string(CSTR("answer is '\0-----'"), 18), STR("answer is '{:-<6c}'"), false); + check(std::basic_string(CSTR("answer is '--\0---'"), 18), STR("answer is '{:-^6c}'"), false); // *** Sign *** - check_exception("A sign field isn't allowed in this format-spec", - STR("{:-c}"), true); - check_exception("A sign field isn't allowed in this format-spec", - STR("{:+c}"), true); - check_exception("A sign field isn't allowed in this format-spec", - STR("{: c}"), true); + check_exception("A sign field isn't allowed in this format-spec", STR("{:-c}"), true); + check_exception("A sign field isn't allowed in this format-spec", STR("{:+c}"), true); + check_exception("A sign field isn't allowed in this format-spec", STR("{: c}"), true); // *** alternate form *** - check_exception("An alternate form field isn't allowed in this format-spec", - STR("{:#c}"), true); + check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#c}"), true); // *** zero-padding *** - check_exception("A zero-padding field isn't allowed in this format-spec", - STR("{:0c}"), true); + check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0c}"), true); // *** precision *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.c}"), true); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0c}"), true); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42c}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), true); // *** locale-specific form *** // Note it has no effect but it's allowed. @@ -511,14 +442,11 @@ // *** type *** for (const auto& fmt : invalid_types("bBcdosxX")) - check_exception( - "The format-spec type has a type not supported for a bool argument", - fmt, true); + check_exception("The format-spec type has a type not supported for a bool argument", fmt, true); } template -void format_test_bool_as_integer(TestFunction check, - ExceptionTest check_exception) { +void format_test_bool_as_integer(TestFunction check, ExceptionTest check_exception) { // *** align-fill & width *** check(STR("answer is '1'"), STR("answer is '{:<1d}'"), true); check(STR("answer is '1 '"), STR("answer is '{:<2d}'"), true); @@ -591,26 +519,20 @@ check(STR("answer is 0X0000000000"), STR("answer is {:#012X}"), false); // *** precision *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.}"), true); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0}"), true); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), true); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), true); // *** locale-specific form *** // See locale-specific_form.pass.cpp // *** type *** for (const auto& fmt : invalid_types("bBcdosxX")) - check_exception( - "The format-spec type has a type not supported for a bool argument", - fmt, true); + check_exception("The format-spec type has a type not supported for a bool argument", fmt, true); } template -void format_test_integer_as_integer(TestFunction check, - ExceptionTest check_exception) { +void format_test_integer_as_integer(TestFunction check, ExceptionTest check_exception) { // *** align-fill & width *** check(STR("answer is '42'"), STR("answer is '{:<1}'"), I(42)); check(STR("answer is '42'"), STR("answer is '{:<2}'"), I(42)); @@ -729,26 +651,20 @@ check(STR("answer is +0X00000002A"), STR("answer is {:+#012X}"), I(42)); // *** precision *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.}"), I(0)); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0}"), I(0)); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42}"), I(0)); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), I(0)); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), I(0)); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), I(0)); // *** locale-specific form *** // See locale-specific_form.pass.cpp // *** type *** for (const auto& fmt : invalid_types("bBcdoxX")) - check_exception( - "The format-spec type has a type not supported for an integer argument", - fmt, 42); + check_exception("The format-spec type has a type not supported for an integer argument", fmt, 42); } template -void format_test_integer_as_char(TestFunction check, - ExceptionTest check_exception) { +void format_test_integer_as_char(TestFunction check, ExceptionTest check_exception) { // *** align-fill & width *** check(STR("answer is '* '"), STR("answer is '{:6c}'"), I(42)); check(STR("answer is ' *'"), STR("answer is '{:>6c}'"), I(42)); @@ -761,28 +677,20 @@ // *** Sign *** check(STR("answer is *"), STR("answer is {:c}"), I(42)); - check_exception("A sign field isn't allowed in this format-spec", - STR("answer is {:-c}"), I(42)); - check_exception("A sign field isn't allowed in this format-spec", - STR("answer is {:+c}"), I(42)); - check_exception("A sign field isn't allowed in this format-spec", - STR("answer is {: c}"), I(42)); + check_exception("A sign field isn't allowed in this format-spec", STR("answer is {:-c}"), I(42)); + check_exception("A sign field isn't allowed in this format-spec", STR("answer is {:+c}"), I(42)); + check_exception("A sign field isn't allowed in this format-spec", STR("answer is {: c}"), I(42)); // *** alternate form *** - check_exception("An alternate form field isn't allowed in this format-spec", - STR("answer is {:#c}"), I(42)); + check_exception("An alternate form field isn't allowed in this format-spec", STR("answer is {:#c}"), I(42)); // *** zero-padding & width *** - check_exception("A zero-padding field isn't allowed in this format-spec", - STR("answer is {:01c}"), I(42)); + check_exception("A zero-padding field isn't allowed in this format-spec", STR("answer is {:01c}"), I(42)); // *** precision *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.c}"), I(0)); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0c}"), I(0)); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42c}"), I(0)); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), I(0)); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), I(0)); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), I(0)); // *** locale-specific form *** // Note it has no effect but it's allowed. @@ -790,9 +698,7 @@ // *** type *** for (const auto& fmt : invalid_types("bBcdoxX")) - check_exception( - "The format-spec type has a type not supported for an integer argument", - fmt, I(42)); + check_exception("The format-spec type has a type not supported for an integer argument", fmt, I(42)); // *** Validate range *** // TODO FMT Update test after adding 128-bit support. @@ -800,18 +706,16 @@ // The code has some duplications to keep the if statement readable. if constexpr (std::signed_integral) { if constexpr (std::signed_integral && sizeof(I) > sizeof(CharT)) { - check_exception("Integral value outside the range of the char type", - STR("{:c}"), std::numeric_limits::min()); - check_exception("Integral value outside the range of the char type", - STR("{:c}"), std::numeric_limits::max()); - } else if constexpr (std::unsigned_integral && - sizeof(I) >= sizeof(CharT)) { - check_exception("Integral value outside the range of the char type", - STR("{:c}"), std::numeric_limits::max()); + check_exception("Integral value outside the range of the char type", STR("{:c}"), + std::numeric_limits::min()); + check_exception("Integral value outside the range of the char type", STR("{:c}"), + std::numeric_limits::max()); + } else if constexpr (std::unsigned_integral && sizeof(I) >= sizeof(CharT)) { + check_exception("Integral value outside the range of the char type", STR("{:c}"), + std::numeric_limits::max()); } } else if constexpr (sizeof(I) > sizeof(CharT)) { - check_exception("Integral value outside the range of the char type", - STR("{:c}"), std::numeric_limits::max()); + check_exception("Integral value outside the range of the char type", STR("{:c}"), std::numeric_limits::max()); } } } @@ -823,8 +727,7 @@ } template -void format_test_signed_integer(TestFunction check, - ExceptionTest check_exception) { +void format_test_signed_integer(TestFunction check, ExceptionTest check_exception) { format_test_integer(check, check_exception); format_test_integer(check, check_exception); format_test_integer(check, check_exception); @@ -839,62 +742,49 @@ check(STR("-128"), STR("{:#}"), std::numeric_limits::min()); check(STR("-0x80"), STR("{:#x}"), std::numeric_limits::min()); - check(STR("-0b1000000000000000"), STR("{:#b}"), - std::numeric_limits::min()); + check(STR("-0b1000000000000000"), STR("{:#b}"), std::numeric_limits::min()); check(STR("-0100000"), STR("{:#o}"), std::numeric_limits::min()); check(STR("-32768"), STR("{:#}"), std::numeric_limits::min()); check(STR("-0x8000"), STR("{:#x}"), std::numeric_limits::min()); - check(STR("-0b10000000000000000000000000000000"), STR("{:#b}"), - std::numeric_limits::min()); - check(STR("-020000000000"), STR("{:#o}"), - std::numeric_limits::min()); + check(STR("-0b10000000000000000000000000000000"), STR("{:#b}"), std::numeric_limits::min()); + check(STR("-020000000000"), STR("{:#o}"), std::numeric_limits::min()); check(STR("-2147483648"), STR("{:#}"), std::numeric_limits::min()); check(STR("-0x80000000"), STR("{:#x}"), std::numeric_limits::min()); check(STR("-0b100000000000000000000000000000000000000000000000000000000000000" "0"), STR("{:#b}"), std::numeric_limits::min()); - check(STR("-01000000000000000000000"), STR("{:#o}"), - std::numeric_limits::min()); - check(STR("-9223372036854775808"), STR("{:#}"), - std::numeric_limits::min()); - check(STR("-0x8000000000000000"), STR("{:#x}"), - std::numeric_limits::min()); + check(STR("-01000000000000000000000"), STR("{:#o}"), std::numeric_limits::min()); + check(STR("-9223372036854775808"), STR("{:#}"), std::numeric_limits::min()); + check(STR("-0x8000000000000000"), STR("{:#x}"), std::numeric_limits::min()); check(STR("0b1111111"), STR("{:#b}"), std::numeric_limits::max()); check(STR("0177"), STR("{:#o}"), std::numeric_limits::max()); check(STR("127"), STR("{:#}"), std::numeric_limits::max()); check(STR("0x7f"), STR("{:#x}"), std::numeric_limits::max()); - check(STR("0b111111111111111"), STR("{:#b}"), - std::numeric_limits::max()); + check(STR("0b111111111111111"), STR("{:#b}"), std::numeric_limits::max()); check(STR("077777"), STR("{:#o}"), std::numeric_limits::max()); check(STR("32767"), STR("{:#}"), std::numeric_limits::max()); check(STR("0x7fff"), STR("{:#x}"), std::numeric_limits::max()); - check(STR("0b1111111111111111111111111111111"), STR("{:#b}"), - std::numeric_limits::max()); + check(STR("0b1111111111111111111111111111111"), STR("{:#b}"), std::numeric_limits::max()); check(STR("017777777777"), STR("{:#o}"), std::numeric_limits::max()); check(STR("2147483647"), STR("{:#}"), std::numeric_limits::max()); check(STR("0x7fffffff"), STR("{:#x}"), std::numeric_limits::max()); - check( - STR("0b111111111111111111111111111111111111111111111111111111111111111"), - STR("{:#b}"), std::numeric_limits::max()); - check(STR("0777777777777777777777"), STR("{:#o}"), - std::numeric_limits::max()); - check(STR("9223372036854775807"), STR("{:#}"), - std::numeric_limits::max()); - check(STR("0x7fffffffffffffff"), STR("{:#x}"), + check(STR("0b111111111111111111111111111111111111111111111111111111111111111"), STR("{:#b}"), std::numeric_limits::max()); + check(STR("0777777777777777777777"), STR("{:#o}"), std::numeric_limits::max()); + check(STR("9223372036854775807"), STR("{:#}"), std::numeric_limits::max()); + check(STR("0x7fffffffffffffff"), STR("{:#x}"), std::numeric_limits::max()); // TODO FMT Add __int128_t test after implementing full range. } template -void format_test_unsigned_integer(TestFunction check, - ExceptionTest check_exception) { +void format_test_unsigned_integer(TestFunction check, ExceptionTest check_exception) { format_test_integer(check, check_exception); format_test_integer(check, check_exception); format_test_integer(check, check_exception); @@ -909,28 +799,21 @@ check(STR("255"), STR("{:#}"), std::numeric_limits::max()); check(STR("0xff"), STR("{:#x}"), std::numeric_limits::max()); - check(STR("0b1111111111111111"), STR("{:#b}"), - std::numeric_limits::max()); + check(STR("0b1111111111111111"), STR("{:#b}"), std::numeric_limits::max()); check(STR("0177777"), STR("{:#o}"), std::numeric_limits::max()); check(STR("65535"), STR("{:#}"), std::numeric_limits::max()); check(STR("0xffff"), STR("{:#x}"), std::numeric_limits::max()); - check(STR("0b11111111111111111111111111111111"), STR("{:#b}"), - std::numeric_limits::max()); - check(STR("037777777777"), STR("{:#o}"), - std::numeric_limits::max()); + check(STR("0b11111111111111111111111111111111"), STR("{:#b}"), std::numeric_limits::max()); + check(STR("037777777777"), STR("{:#o}"), std::numeric_limits::max()); check(STR("4294967295"), STR("{:#}"), std::numeric_limits::max()); check(STR("0xffffffff"), STR("{:#x}"), std::numeric_limits::max()); - check( - STR("0b1111111111111111111111111111111111111111111111111111111111111111"), - STR("{:#b}"), std::numeric_limits::max()); - check(STR("01777777777777777777777"), STR("{:#o}"), - std::numeric_limits::max()); - check(STR("18446744073709551615"), STR("{:#}"), - std::numeric_limits::max()); - check(STR("0xffffffffffffffff"), STR("{:#x}"), + check(STR("0b1111111111111111111111111111111111111111111111111111111111111111"), STR("{:#b}"), std::numeric_limits::max()); + check(STR("01777777777777777777777"), STR("{:#o}"), std::numeric_limits::max()); + check(STR("18446744073709551615"), STR("{:#}"), std::numeric_limits::max()); + check(STR("0xffffffffffffffff"), STR("{:#x}"), std::numeric_limits::max()); // TODO FMT Add __uint128_t test after implementing full range. } @@ -959,46 +842,30 @@ check(STR("answer is '--*---'"), STR("answer is '{:-^6c}'"), CharT('*')); // *** Sign *** - check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"), - CharT('*')); - check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"), - CharT('*')); - check_exception("A sign field isn't allowed in this format-spec", STR("{: }"), - CharT('*')); - - check_exception("A sign field isn't allowed in this format-spec", - STR("{:-c}"), CharT('*')); - check_exception("A sign field isn't allowed in this format-spec", - STR("{:+c}"), CharT('*')); - check_exception("A sign field isn't allowed in this format-spec", - STR("{: c}"), CharT('*')); + check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"), CharT('*')); + check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"), CharT('*')); + check_exception("A sign field isn't allowed in this format-spec", STR("{: }"), CharT('*')); + + check_exception("A sign field isn't allowed in this format-spec", STR("{:-c}"), CharT('*')); + check_exception("A sign field isn't allowed in this format-spec", STR("{:+c}"), CharT('*')); + check_exception("A sign field isn't allowed in this format-spec", STR("{: c}"), CharT('*')); // *** alternate form *** - check_exception("An alternate form field isn't allowed in this format-spec", - STR("{:#}"), CharT('*')); - check_exception("An alternate form field isn't allowed in this format-spec", - STR("{:#c}"), CharT('*')); + check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#}"), CharT('*')); + check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#c}"), CharT('*')); // *** zero-padding *** - check_exception("A zero-padding field isn't allowed in this format-spec", - STR("{:0}"), CharT('*')); - check_exception("A zero-padding field isn't allowed in this format-spec", - STR("{:0c}"), CharT('*')); + check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0}"), CharT('*')); + check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0c}"), CharT('*')); // *** precision *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.}"), CharT('*')); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0}"), CharT('*')); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42}"), CharT('*')); - - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.c}"), CharT('*')); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0c}"), CharT('*')); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42c}"), CharT('*')); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), CharT('*')); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), CharT('*')); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), CharT('*')); + + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), CharT('*')); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), CharT('*')); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), CharT('*')); // *** locale-specific form *** // Note it has no effect but it's allowed. @@ -1007,14 +874,11 @@ // *** type *** for (const auto& fmt : invalid_types("bBcdoxX")) - check_exception( - "The format-spec type has a type not supported for a char argument", - fmt, CharT('*')); + check_exception("The format-spec type has a type not supported for a char argument", fmt, CharT('*')); } template -void format_test_char_as_integer(TestFunction check, - ExceptionTest check_exception) { +void format_test_char_as_integer(TestFunction check, ExceptionTest check_exception) { // *** align-fill & width *** check(STR("answer is '42'"), STR("answer is '{:<1d}'"), CharT('*')); @@ -1067,21 +931,16 @@ check(STR("answer is +0X00000002A"), STR("answer is {:+#012X}"), CharT('*')); // *** precision *** - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.d}"), CharT('*')); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.0d}"), CharT('*')); - check_exception("The format-spec should consume the input or end with a '}'", - STR("{:.42d}"), CharT('*')); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.d}"), CharT('*')); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0d}"), CharT('*')); + check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42d}"), CharT('*')); // *** locale-specific form *** // See locale-specific_form.pass.cpp // *** type *** for (const auto& fmt : invalid_types("bBcdoxX")) - check_exception( - "The format-spec type has a type not supported for a char argument", - fmt, '*'); + check_exception("The format-spec type has a type not supported for a char argument", fmt, '*'); } template @@ -2650,42 +2509,34 @@ // ** Test invalid format strings *** check_exception("The format string terminates at a '{'", STR("{")); - check_exception("The replacement field misses a terminating '}'", STR("{:"), - 42); + check_exception("The replacement field misses a terminating '}'", STR("{:"), 42); - check_exception("The format string contains an invalid escape sequence", - STR("}")); - check_exception("The format string contains an invalid escape sequence", - STR("{:}-}"), 42); + check_exception("The format string contains an invalid escape sequence", STR("}")); + check_exception("The format string contains an invalid escape sequence", STR("{:}-}"), 42); - check_exception("The format string contains an invalid escape sequence", - STR("} ")); + check_exception("The format string contains an invalid escape sequence", STR("} ")); - check_exception( - "The arg-id of the format-spec starts with an invalid character", - STR("{-"), 42); + check_exception("The arg-id of the format-spec starts with an invalid character", STR("{-"), 42); check_exception("Argument index out of bounds", STR("hello {}")); check_exception("Argument index out of bounds", STR("hello {0}")); check_exception("Argument index out of bounds", STR("hello {1}"), 42); // *** Test char format argument *** // The `char` to `wchar_t` formatting is tested separately. - check(STR("hello 09azAZ!"), STR("hello {}{}{}{}{}{}{}"), CharT('0'), - CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!')); + check(STR("hello 09azAZ!"), STR("hello {}{}{}{}{}{}{}"), CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'), + CharT('Z'), CharT('!')); format_test_char(check, check_exception); format_test_char_as_integer(check, check_exception); // *** Test string format argument *** { - CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'), - CharT('A'), CharT('Z'), CharT('!'), 0}; + CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!'), 0}; CharT* data = buffer; check(STR("hello 09azAZ!"), STR("hello {}"), data); } { - CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'), - CharT('A'), CharT('Z'), CharT('!'), 0}; + CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!'), 0}; const CharT* data = buffer; check(STR("hello 09azAZ!"), STR("hello {}"), data); } @@ -2718,20 +2569,14 @@ { // Note 128-bit support is only partly implemented test the range // conditions here. - std::basic_string min = - std::format(STR("{}"), std::numeric_limits::min()); - check(min, STR("{}"), - static_cast<__int128_t>(std::numeric_limits::min())); - std::basic_string max = - std::format(STR("{}"), std::numeric_limits::max()); - check(max, STR("{}"), - static_cast<__int128_t>(std::numeric_limits::max())); - check_exception( - "128-bit value is outside of implemented range", STR("{}"), - static_cast<__int128_t>(std::numeric_limits::min()) - 1); - check_exception( - "128-bit value is outside of implemented range", STR("{}"), - static_cast<__int128_t>(std::numeric_limits::max()) + 1); + std::basic_string min = std::format(STR("{}"), std::numeric_limits::min()); + check(min, STR("{}"), static_cast<__int128_t>(std::numeric_limits::min())); + std::basic_string max = std::format(STR("{}"), std::numeric_limits::max()); + check(max, STR("{}"), static_cast<__int128_t>(std::numeric_limits::max())); + check_exception("128-bit value is outside of implemented range", STR("{}"), + static_cast<__int128_t>(std::numeric_limits::min()) - 1); + check_exception("128-bit value is outside of implemented range", STR("{}"), + static_cast<__int128_t>(std::numeric_limits::max()) + 1); } #endif format_test_signed_integer(check, check_exception); @@ -2747,15 +2592,10 @@ { // Note 128-bit support is only partly implemented test the range // conditions here. - std::basic_string max = - std::format(STR("{}"), std::numeric_limits::max()); - check(max, STR("{}"), - static_cast<__uint128_t>( - std::numeric_limits::max())); + std::basic_string max = std::format(STR("{}"), std::numeric_limits::max()); + check(max, STR("{}"), static_cast<__uint128_t>(std::numeric_limits::max())); check_exception("128-bit value is outside of implemented range", STR("{}"), - static_cast<__uint128_t>( - std::numeric_limits::max()) + - 1); + static_cast<__uint128_t>(std::numeric_limits::max()) + 1); } #endif format_test_unsigned_integer(check, check_exception); diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp @@ -30,8 +30,7 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { { std::basic_string out(expected.size(), CharT(' ')); @@ -42,14 +41,12 @@ { std::list out; std::format_to(std::back_inserter(out), std::locale(), fmt, args...); - assert( - std::equal(out.begin(), out.end(), expected.begin(), expected.end())); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end())); } { std::vector out; std::format_to(std::back_inserter(out), std::locale(), fmt, args...); - assert( - std::equal(out.begin(), out.end(), expected.begin(), expected.end())); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end())); } { assert(expected.size() < 4096 && "Update the size of the buffer."); @@ -61,8 +58,8 @@ } }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::basic_string out; diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp @@ -31,8 +31,7 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { { std::basic_string out(expected.size(), CharT(' ')); @@ -43,14 +42,12 @@ { std::list out; std::format_to(std::back_inserter(out), fmt, args...); - assert( - std::equal(out.begin(), out.end(), expected.begin(), expected.end())); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end())); } { std::vector out; std::format_to(std::back_inserter(out), fmt, args...); - assert( - std::equal(out.begin(), out.end(), expected.begin(), expected.end())); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end())); } { assert(expected.size() < 4096 && "Update the size of the buffer."); @@ -62,8 +59,8 @@ } }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::basic_string out; diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp @@ -32,13 +32,11 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { { std::list out; - std::format_to_n_result result = std::format_to_n( - std::back_inserter(out), 0, std::locale(), fmt, args...); + std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 0, std::locale(), fmt, args...); // To avoid signedness warnings make sure formatted_size uses the same type // as result.size. using diff_type = decltype(result.size); @@ -49,20 +47,17 @@ } { std::vector out; - std::format_to_n_result result = std::format_to_n( - std::back_inserter(out), 5, std::locale(), fmt, args...); + std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 5, std::locale(), fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...); diff_type size = std::min(5, formatted_size); assert(result.size == formatted_size); - assert(std::equal(out.begin(), out.end(), expected.begin(), - expected.begin() + size)); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.begin() + size)); } { std::basic_string out; - std::format_to_n_result result = std::format_to_n( - std::back_inserter(out), 1000, std::locale(), fmt, args...); + std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, std::locale(), fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...); diff_type size = std::min(1000, formatted_size); @@ -73,8 +68,7 @@ { // Test the returned iterator. std::basic_string out(10, CharT(' ')); - std::format_to_n_result result = - std::format_to_n(out.begin(), 10, std::locale(), fmt, args...); + std::format_to_n_result result = std::format_to_n(out.begin(), 10, std::locale(), fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...); diff_type size = std::min(10, formatted_size); @@ -88,8 +82,7 @@ "If the difference type isn't negative the test will fail " "due to using a large positive value."); CharT buffer[1] = {CharT(0)}; - std::format_to_n_result result = - std::format_to_n(buffer, -1, std::locale(), fmt, args...); + std::format_to_n_result result = std::format_to_n(buffer, -1, std::locale(), fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...); @@ -99,8 +92,8 @@ } }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::basic_string out; diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp @@ -29,13 +29,11 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { { std::list out; - std::format_to_n_result result = - std::format_to_n(std::back_inserter(out), 0, fmt, args...); + std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 0, fmt, args...); // To avoid signedness warnings make sure formatted_size uses the same type // as result.size. using diff_type = decltype(result.size); @@ -46,20 +44,17 @@ } { std::vector out; - std::format_to_n_result result = - std::format_to_n(std::back_inserter(out), 5, fmt, args...); + std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 5, fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(fmt, args...); diff_type size = std::min(5, formatted_size); assert(result.size == formatted_size); - assert(std::equal(out.begin(), out.end(), expected.begin(), - expected.begin() + size)); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.begin() + size)); } { std::basic_string out; - std::format_to_n_result result = - std::format_to_n(std::back_inserter(out), 1000, fmt, args...); + std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(fmt, args...); diff_type size = std::min(1000, formatted_size); @@ -70,8 +65,7 @@ { // Test the returned iterator. std::basic_string out(10, CharT(' ')); - std::format_to_n_result result = - std::format_to_n(out.begin(), 10, fmt, args...); + std::format_to_n_result result = std::format_to_n(out.begin(), 10, fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(fmt, args...); diff_type size = std::min(10, formatted_size); @@ -95,8 +89,8 @@ } }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::basic_string out; diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp @@ -28,15 +28,14 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { size_t size = std::formatted_size(std::locale(), fmt, args...); assert(size == expected.size()); }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::formatted_size(std::locale(), fmt, args...); diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp @@ -25,15 +25,14 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { size_t size = std::formatted_size(fmt, args...); assert(size == expected.size()); }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::formatted_size(fmt, args...); diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp @@ -126,21 +126,19 @@ #endif template -void test(std::basic_string expected, std::basic_string fmt, - const Args&... args) { +void test(std::basic_string expected, std::basic_string fmt, const Args&... args) { // *** format *** { std::basic_string out = std::format(fmt, args...); if constexpr (std::same_as) if (out != expected) - std::cerr << "\nFormat string " << fmt << "\nExpected output " - << expected << "\nActual output " << out << '\n'; + std::cerr << "\nFormat string " << fmt << "\nExpected output " << expected << "\nActual output " << out + << '\n'; assert(out == expected); } // *** vformat *** { - std::basic_string out = - std::vformat(fmt, std::make_format_args>(args...)); + std::basic_string out = std::vformat(fmt, std::make_format_args>(args...)); assert(out == expected); } // *** format_to *** @@ -153,16 +151,14 @@ // *** vformat_to *** { std::basic_string out(expected.size(), CharT(' ')); - auto it = std::vformat_to(out.begin(), fmt, - std::make_format_args>(args...)); + auto it = std::vformat_to(out.begin(), fmt, std::make_format_args>(args...)); assert(it == out.end()); assert(out == expected); } // *** format_to_n *** { std::basic_string out; - std::format_to_n_result result = - std::format_to_n(std::back_inserter(out), 1000, fmt, args...); + std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(fmt, args...); diff_type size = std::min(1000, formatted_size); @@ -178,21 +174,19 @@ } template -void test(std::basic_string expected, std::locale loc, - std::basic_string fmt, const Args&... args) { +void test(std::basic_string expected, std::locale loc, std::basic_string fmt, const Args&... args) { // *** format *** { std::basic_string out = std::format(loc, fmt, args...); if constexpr (std::same_as) if (out != expected) - std::cerr << "\nFormat string " << fmt << "\nExpected output " - << expected << "\nActual output " << out << '\n'; + std::cerr << "\nFormat string " << fmt << "\nExpected output " << expected << "\nActual output " << out + << '\n'; assert(out == expected); } // *** vformat *** { - std::basic_string out = std::vformat( - loc, fmt, std::make_format_args>(args...)); + std::basic_string out = std::vformat(loc, fmt, std::make_format_args>(args...)); assert(out == expected); } // *** format_to *** @@ -205,16 +199,14 @@ // *** vformat_to *** { std::basic_string out(expected.size(), CharT(' ')); - auto it = std::vformat_to(out.begin(), loc, fmt, - std::make_format_args>(args...)); + auto it = std::vformat_to(out.begin(), loc, fmt, std::make_format_args>(args...)); assert(it == out.end()); assert(out == expected); } // *** format_to_n *** { std::basic_string out; - std::format_to_n_result result = - std::format_to_n(std::back_inserter(out), 1000, loc, fmt, args...); + std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, loc, fmt, args...); using diff_type = decltype(result.size); diff_type formatted_size = std::formatted_size(loc, fmt, args...); diff_type size = std::min(1000, formatted_size); @@ -239,13 +231,13 @@ string_type do_falsename() const override { return "ungültig"; } }; -#ifndef TEST_HAS_NO_WIDE_CHARACTERS +# ifndef TEST_HAS_NO_WIDE_CHARACTERS template <> struct numpunct_unicode : std::numpunct { string_type do_truename() const override { return L"gültig"; } string_type do_falsename() const override { return L"ungültig"; } }; -#endif +# endif #endif // TEST_HAS_NO_UNICODE template @@ -268,8 +260,7 @@ test(STR("false"), std::locale(LOCALE_en_US_UTF_8), STR("{:L}"), false); #ifndef TEST_HAS_NO_UNICODE - std::locale loc_unicode = - std::locale(std::locale(), new numpunct_unicode()); + std::locale loc_unicode = std::locale(std::locale(), new numpunct_unicode()); test(STR("gültig"), loc_unicode, STR("{:L}"), true); test(STR("ungültig"), loc_unicode, STR("{:L}"), false); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp @@ -24,20 +24,17 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { - std::basic_string out = std::vformat( - std::locale(), fmt, std::make_format_args>(args...)); + std::basic_string out = std::vformat(std::locale(), fmt, std::make_format_args>(args...)); assert(out == expected); }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { - (void) std::vformat(std::locale(), fmt, - std::make_format_args>(args...)); + (void)std::vformat(std::locale(), fmt, std::make_format_args>(args...)); assert(false); } catch ([[maybe_unused]] std::format_error& e) { LIBCPP_ASSERT(e.what() == what); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp @@ -23,16 +23,14 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { - std::basic_string out = - std::vformat(fmt, std::make_format_args>(args...)); + std::basic_string out = std::vformat(fmt, std::make_format_args>(args...)); assert(out == expected); }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { TEST_IGNORE_NODISCARD std::vformat(fmt, std::make_format_args>(args...)); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp @@ -30,49 +30,40 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { { std::basic_string out(expected.size(), CharT(' ')); - auto it = std::vformat_to(out.begin(), std::locale(), fmt, - std::make_format_args>(args...)); + auto it = std::vformat_to(out.begin(), std::locale(), fmt, std::make_format_args>(args...)); assert(it == out.end()); assert(out == expected); } { std::list out; - std::vformat_to(std::back_inserter(out), std::locale(), fmt, - std::make_format_args>(args...)); - assert( - std::equal(out.begin(), out.end(), expected.begin(), expected.end())); + std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args>(args...)); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end())); } { std::vector out; - std::vformat_to(std::back_inserter(out), std::locale(), fmt, - std::make_format_args>(args...)); - assert( - std::equal(out.begin(), out.end(), expected.begin(), expected.end())); + std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args>(args...)); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end())); } { assert(expected.size() < 4096 && "Update the size of the buffer."); CharT out[4096]; - CharT* it = - std::vformat_to(out, std::locale(), fmt, - std::make_format_args>(args...)); + CharT* it = std::vformat_to(out, std::locale(), fmt, std::make_format_args>(args...)); assert(std::distance(out, it) == int(expected.size())); // Convert to std::string since output contains '\0' for boolean tests. assert(std::basic_string(out, it) == expected); } }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::basic_string out; - std::vformat_to(std::back_inserter(out), std::locale(), fmt, - std::make_format_args>(args...)); + std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args>(args...)); assert(false); } catch ([[maybe_unused]] std::format_error& e) { LIBCPP_ASSERT(e.what() == what); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp @@ -31,48 +31,40 @@ #include "test_macros.h" #include "format_tests.h" -auto test = [](std::basic_string expected, - std::basic_string fmt, +auto test = [](std::basic_string expected, std::basic_string fmt, const Args&... args) { { std::basic_string out(expected.size(), CharT(' ')); - auto it = std::vformat_to(out.begin(), fmt, - std::make_format_args>(args...)); + auto it = std::vformat_to(out.begin(), fmt, std::make_format_args>(args...)); assert(it == out.end()); assert(out == expected); } { std::list out; - std::vformat_to(std::back_inserter(out), fmt, - std::make_format_args>(args...)); - assert( - std::equal(out.begin(), out.end(), expected.begin(), expected.end())); + std::vformat_to(std::back_inserter(out), fmt, std::make_format_args>(args...)); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end())); } { std::vector out; - std::vformat_to(std::back_inserter(out), fmt, - std::make_format_args>(args...)); - assert( - std::equal(out.begin(), out.end(), expected.begin(), expected.end())); + std::vformat_to(std::back_inserter(out), fmt, std::make_format_args>(args...)); + assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end())); } { assert(expected.size() < 4096 && "Update the size of the buffer."); CharT out[4096]; - CharT* it = std::vformat_to( - out, fmt, std::make_format_args>(args...)); + CharT* it = std::vformat_to(out, fmt, std::make_format_args>(args...)); assert(std::distance(out, it) == int(expected.size())); // Convert to std::string since output contains '\0' for boolean tests. assert(std::basic_string(out, it) == expected); } }; -auto test_exception = []( - std::string_view what, std::basic_string fmt, const Args&... args) { +auto test_exception = + [](std::string_view what, std::basic_string fmt, const Args&... args) { #ifndef TEST_HAS_NO_EXCEPTIONS try { std::basic_string out; - std::vformat_to(std::back_inserter(out), fmt, - std::make_format_args>(args...)); + std::vformat_to(std::back_inserter(out), fmt, std::make_format_args>(args...)); assert(false); } catch ([[maybe_unused]] std::format_error& e) { LIBCPP_ASSERT(e.what() == what); diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -2,6 +2,10 @@ tablegen(LLVM Options.inc -gen-opt-parser-defs) add_public_tablegen_target(ELFOptionsTableGen) +if(LLVM_ENABLE_ZLIB) + set(imported_libs ZLIB::ZLIB) +endif() + add_lld_library(lldELF AArch64ErrataFix.cpp Arch/AArch64.cpp @@ -58,6 +62,7 @@ LINK_LIBS lldCommon + ${imported_libs} ${LLVM_PTHREAD_LIB} DEPENDS diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -25,6 +25,12 @@ class InputSection; class InputSectionBase; +struct CompressedData { + std::unique_ptr[]> shards; + uint32_t numShards = 0; + uint32_t checksum = 0; +}; + // This represents a section in an output file. // It is composed of multiple InputSections. // The writer creates multiple OutputSections and assign them unique, @@ -113,7 +119,7 @@ private: // Used for implementation of --compress-debug-sections option. SmallVector zDebugHeader; - SmallVector compressedData; + CompressedData compressed; std::array getFiller(); }; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -15,7 +15,7 @@ #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/Support/Compression.h" +#include "llvm/Config/config.h" // LLVM_ENABLE_ZLIB #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Parallel.h" @@ -23,6 +23,9 @@ #include "llvm/Support/TimeProfiler.h" #include #include +#if LLVM_ENABLE_ZLIB +#include +#endif using namespace llvm; using namespace llvm::dwarf; @@ -284,13 +287,45 @@ memcpy(buf + i, filler.data(), size - i); } +#if LLVM_ENABLE_ZLIB +static SmallVector deflateShard(ArrayRef in, int level, + int flush) { + // 15 and 8 are default. windowBits=-15 is negative to generate raw deflate + // data with no zlib header or trailer. + z_stream s = {}; + deflateInit2(&s, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + s.next_in = const_cast(in.data()); + s.avail_in = in.size(); + + // Allocate a buffer of half of the input size, and grow it by 1.5x if + // insufficient. + SmallVector out; + size_t pos = 0; + out.resize_for_overwrite(std::max(in.size() / 2, 64)); + do { + if (pos == out.size()) + out.resize_for_overwrite(out.size() * 3 / 2); + s.next_out = out.data() + pos; + s.avail_out = out.size() - pos; + (void)deflate(&s, flush); + pos = s.next_out - out.data(); + } while (s.avail_out == 0); + assert(s.avail_in == 0); + + out.truncate(pos); + deflateEnd(&s); + return out; +} +#endif + // Compress section contents if this section contains debug info. template void OutputSection::maybeCompress() { +#if LLVM_ENABLE_ZLIB using Elf_Chdr = typename ELFT::Chdr; // Compress only DWARF debug sections. if (!config->compressDebugSections || (flags & SHF_ALLOC) || - !name.startswith(".debug_")) + !name.startswith(".debug_") || size == 0) return; llvm::TimeTraceScope timeScope("Compress debug sections"); @@ -309,13 +344,42 @@ // -O2 is given, we use level 6 to compress debug info more by ~15%. We found // that level 7 to 9 doesn't make much difference (~1% more compression) while // they take significant amount of time (~2x), so level 6 seems enough. - if (Error e = zlib::compress(toStringRef(buf), compressedData, - config->optimize >= 2 ? 6 : 1)) - fatal("compress failed: " + llvm::toString(std::move(e))); + const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED; + + // Split input into 1-MiB shards. + constexpr size_t shardSize = 1 << 20; + const size_t numShards = (size + shardSize - 1) / shardSize; + auto shardsIn = std::make_unique[]>(numShards); + for (size_t i = 0, start = 0, end; start != buf.size(); ++i, start = end) { + end = std::min(start + shardSize, buf.size()); + shardsIn[i] = makeArrayRef(buf.data() + start, end - start); + } + + // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all + // shards but the last to flush the output to a byte boundary to be + // concatenated with the next shard. + auto shardsOut = std::make_unique[]>(numShards); + auto shardsAdler = std::make_unique(numShards); + parallelForEachN(0, numShards, [&](size_t i) { + shardsOut[i] = deflateShard(shardsIn[i], level, + i != numShards - 1 ? Z_SYNC_FLUSH : Z_FINISH); + shardsAdler[i] = adler32(1, shardsIn[i].data(), shardsIn[i].size()); + }); + + // Update section size and combine Alder-32 checksums. + uint32_t checksum = 1; // Initial Adler-32 value + size = sizeof(Elf_Chdr) + 2; // Elf_Chdir and zlib header + for (size_t i = 0; i != numShards; ++i) { + size += shardsOut[i].size(); + checksum = adler32_combine(checksum, shardsAdler[i], shardsIn[i].size()); + } + size += 4; // checksum - // Update section headers. - size = sizeof(Elf_Chdr) + compressedData.size(); + compressed.shards = std::move(shardsOut); + compressed.numShards = numShards; + compressed.checksum = checksum; flags |= SHF_COMPRESSED; +#endif } static void writeInt(uint8_t *buf, uint64_t data, uint64_t size) { @@ -339,10 +403,25 @@ // If --compress-debug-section is specified and if this is a debug section, // we've already compressed section contents. If that's the case, // just write it down. - if (!compressedData.empty()) { + if (compressed.shards) { memcpy(buf, zDebugHeader.data(), zDebugHeader.size()); - memcpy(buf + zDebugHeader.size(), compressedData.data(), - compressedData.size()); + buf += zDebugHeader.size(); + size -= zDebugHeader.size(); + + // Compute shard offsets. + auto offsets = std::make_unique(compressed.numShards); + offsets[0] = 2; // zlib header + for (size_t i = 1; i != compressed.numShards; ++i) + offsets[i] = offsets[i - 1] + compressed.shards[i - 1].size(); + + buf[0] = 0x78; // CMF + buf[1] = 0x01; // FLG: best speed + parallelForEachN(0, compressed.numShards, [&](size_t i) { + memcpy(buf + offsets[i], compressed.shards[i].data(), + compressed.shards[i].size()); + }); + + write32be(buf + size - 4, compressed.checksum); return; } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringRef.h" #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h" @@ -55,6 +56,9 @@ #include // GetLLDBSharedCacheUUID() needs to call dlsym() #include +#include +#include +#include #endif #ifndef __APPLE__ @@ -155,28 +159,6 @@ // and later }; -struct lldb_copy_dyld_cache_mapping_info { - uint64_t address; - uint64_t size; - uint64_t fileOffset; - uint32_t maxProt; - uint32_t initProt; -}; - -struct lldb_copy_dyld_cache_local_symbols_info { - uint32_t nlistOffset; - uint32_t nlistCount; - uint32_t stringsOffset; - uint32_t stringsSize; - uint32_t entriesOffset; - uint32_t entriesCount; -}; -struct lldb_copy_dyld_cache_local_symbols_entry { - uint32_t dylibOffset; - uint32_t nlistStartIndex; - uint32_t nlistCount; -}; - static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name, const char *alt_name, size_t reg_byte_size, Stream &data) { @@ -2257,6 +2239,7 @@ llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_"); llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_"); llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_"); + UUID image_uuid; for (i = 0; i < m_header.ncmds; ++i) { const lldb::offset_t cmd_offset = offset; @@ -2324,6 +2307,14 @@ sizeof(function_starts_load_command)); break; + case LC_UUID: { + const uint8_t *uuid_bytes = m_data.PeekData(offset, 16); + + if (uuid_bytes) + image_uuid = UUID::fromOptionalData(uuid_bytes, 16); + break; + } + default: break; } @@ -2615,8 +2606,6 @@ ? eh_frame_section_sp->GetID() : static_cast(NO_SECT); - lldb::offset_t nlist_data_offset = 0; - uint32_t N_SO_index = UINT32_MAX; MachSymtabSectionInfo section_info(section_list); @@ -2682,26 +2671,6 @@ // Next we need to determine the correct path for the dyld shared cache. ArchSpec header_arch = GetArchitecture(); - char dsc_path[PATH_MAX]; - char dsc_path_development[PATH_MAX]; - - snprintf( - dsc_path, sizeof(dsc_path), "%s%s%s", - "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR - */ - "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */ - header_arch.GetArchitectureName()); - - snprintf( - dsc_path_development, sizeof(dsc_path), "%s%s%s%s", - "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR - */ - "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */ - header_arch.GetArchitectureName(), ".development"); - - FileSpec dsc_nondevelopment_filespec(dsc_path); - FileSpec dsc_development_filespec(dsc_path_development); - FileSpec dsc_filespec; UUID dsc_uuid; UUID process_shared_cache_uuid; @@ -2712,155 +2681,99 @@ process_shared_cache_uuid); } - // First see if we can find an exact match for the inferior process - // shared cache UUID in the development or non-development shared caches - // on disk. - if (process_shared_cache_uuid.IsValid()) { - if (FileSystem::Instance().Exists(dsc_development_filespec)) { - UUID dsc_development_uuid = GetSharedCacheUUID( - dsc_development_filespec, byte_order, addr_byte_size); - if (dsc_development_uuid.IsValid() && - dsc_development_uuid == process_shared_cache_uuid) { - dsc_filespec = dsc_development_filespec; - dsc_uuid = dsc_development_uuid; - } - } - if (!dsc_uuid.IsValid() && - FileSystem::Instance().Exists(dsc_nondevelopment_filespec)) { - UUID dsc_nondevelopment_uuid = GetSharedCacheUUID( - dsc_nondevelopment_filespec, byte_order, addr_byte_size); - if (dsc_nondevelopment_uuid.IsValid() && - dsc_nondevelopment_uuid == process_shared_cache_uuid) { - dsc_filespec = dsc_nondevelopment_filespec; - dsc_uuid = dsc_nondevelopment_uuid; - } - } - } + __block bool found_image = false; + __block void *nlist_buffer = nullptr; + __block unsigned nlist_count = 0; + __block char *string_table = nullptr; + __block vm_offset_t vm_nlist_memory = 0; + __block mach_msg_type_number_t vm_nlist_bytes_read = 0; + __block vm_offset_t vm_string_memory = 0; + __block mach_msg_type_number_t vm_string_bytes_read = 0; + + auto _ = llvm::make_scope_exit(^{ + if (vm_nlist_memory) + vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read); + if (vm_string_memory) + vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read); + }); - // Failing a UUID match, prefer the development dyld_shared cache if both - // are present. - if (!FileSystem::Instance().Exists(dsc_filespec)) { - if (FileSystem::Instance().Exists(dsc_development_filespec)) { - dsc_filespec = dsc_development_filespec; - } else { - dsc_filespec = dsc_nondevelopment_filespec; - } - } + typedef llvm::DenseMap UndefinedNameToDescMap; + typedef llvm::DenseMap SymbolIndexToName; + UndefinedNameToDescMap undefined_name_to_desc; + SymbolIndexToName reexport_shlib_needs_fixup; - /* The dyld_cache_header has a pointer to the - dyld_cache_local_symbols_info structure (localSymbolsOffset). - The dyld_cache_local_symbols_info structure gives us three things: - 1. The start and count of the nlist records in the dyld_shared_cache - file - 2. The start and size of the strings for these nlist records - 3. The start and count of dyld_cache_local_symbols_entry entries - - There is one dyld_cache_local_symbols_entry per dylib/framework in the - dyld shared cache. - The "dylibOffset" field is the Mach-O header of this dylib/framework in - the dyld shared cache. - The dyld_cache_local_symbols_entry also lists the start of this - dylib/framework's nlist records - and the count of how many nlist records there are for this - dylib/framework. - */ - - // Process the dyld shared cache header to find the unmapped symbols - - DataBufferSP dsc_data_sp = MapFileData( - dsc_filespec, sizeof(struct lldb_copy_dyld_cache_header_v1), 0); - if (!dsc_uuid.IsValid()) { - dsc_uuid = GetSharedCacheUUID(dsc_filespec, byte_order, addr_byte_size); - } - if (dsc_data_sp) { - DataExtractor dsc_header_data(dsc_data_sp, byte_order, addr_byte_size); + dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) { + uuid_t cache_uuid; + dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid); + if (found_image) + return; - bool uuid_match = true; - if (dsc_uuid.IsValid() && process) { if (process_shared_cache_uuid.IsValid() && - dsc_uuid != process_shared_cache_uuid) { - // The on-disk dyld_shared_cache file is not the same as the one in - // this process' memory, don't use it. - uuid_match = false; - ModuleSP module_sp(GetModule()); - if (module_sp) - module_sp->ReportWarning("process shared cache does not match " - "on-disk dyld_shared_cache file, some " - "symbol names will be missing."); - } - } + process_shared_cache_uuid != UUID::fromOptionalData(&cache_uuid, 16)) + return; - offset = offsetof(struct lldb_copy_dyld_cache_header_v1, mappingOffset); - - uint32_t mappingOffset = dsc_header_data.GetU32(&offset); - - // If the mappingOffset points to a location inside the header, we've - // opened an old dyld shared cache, and should not proceed further. - if (uuid_match && - mappingOffset >= sizeof(struct lldb_copy_dyld_cache_header_v1)) { - - DataBufferSP dsc_mapping_info_data_sp = MapFileData( - dsc_filespec, sizeof(struct lldb_copy_dyld_cache_mapping_info), - mappingOffset); - - DataExtractor dsc_mapping_info_data(dsc_mapping_info_data_sp, - byte_order, addr_byte_size); - offset = 0; - - // The File addresses (from the in-memory Mach-O load commands) for - // the shared libraries in the shared library cache need to be - // adjusted by an offset to match up with the dylibOffset identifying - // field in the dyld_cache_local_symbol_entry's. This offset is - // recorded in mapping_offset_value. - const uint64_t mapping_offset_value = - dsc_mapping_info_data.GetU64(&offset); - - offset = - offsetof(struct lldb_copy_dyld_cache_header_v1, localSymbolsOffset); - uint64_t localSymbolsOffset = dsc_header_data.GetU64(&offset); - uint64_t localSymbolsSize = dsc_header_data.GetU64(&offset); - - if (localSymbolsOffset && localSymbolsSize) { - // Map the local symbols - DataBufferSP dsc_local_symbols_data_sp = - MapFileData(dsc_filespec, localSymbolsSize, localSymbolsOffset); - - if (dsc_local_symbols_data_sp) { - DataExtractor dsc_local_symbols_data(dsc_local_symbols_data_sp, - byte_order, addr_byte_size); - - offset = 0; - - typedef llvm::DenseMap UndefinedNameToDescMap; - typedef llvm::DenseMap SymbolIndexToName; - UndefinedNameToDescMap undefined_name_to_desc; - SymbolIndexToName reexport_shlib_needs_fixup; - - // Read the local_symbols_infos struct in one shot - struct lldb_copy_dyld_cache_local_symbols_info local_symbols_info; - dsc_local_symbols_data.GetU32(&offset, - &local_symbols_info.nlistOffset, 6); - - SectionSP text_section_sp( - section_list->FindSectionByName(GetSegmentNameTEXT())); - - uint32_t header_file_offset = - (text_section_sp->GetFileAddress() - mapping_offset_value); - - offset = local_symbols_info.entriesOffset; - for (uint32_t entry_index = 0; - entry_index < local_symbols_info.entriesCount; entry_index++) { - struct lldb_copy_dyld_cache_local_symbols_entry - local_symbols_entry; - local_symbols_entry.dylibOffset = - dsc_local_symbols_data.GetU32(&offset); - local_symbols_entry.nlistStartIndex = - dsc_local_symbols_data.GetU32(&offset); - local_symbols_entry.nlistCount = - dsc_local_symbols_data.GetU32(&offset); - - if (header_file_offset == local_symbols_entry.dylibOffset) { - unmapped_local_symbols_found = local_symbols_entry.nlistCount; + dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) { + uuid_t dsc_image_uuid; + if (found_image) + return; + + dyld_image_copy_uuid(image, &dsc_image_uuid); + if (image_uuid != UUID::fromOptionalData(dsc_image_uuid, 16)) + return; + + found_image = true; + + // Compute the size of the string table. We need to ask dyld for a + // new SPI to avoid this step. + dyld_image_local_nlist_content_4Symbolication( + image, ^(const void *nlistStart, uint64_t nlistCount, + const char *stringTable) { + if (!nlistStart || !nlistCount) + return; + + // The buffers passed here are valid only inside the block. + // Use vm_read to make a cheap copy of them available for our + // processing later. + kern_return_t ret = + vm_read(mach_task_self(), (vm_address_t)nlistStart, + nlist_byte_size * nlistCount, &vm_nlist_memory, + &vm_nlist_bytes_read); + if (ret != KERN_SUCCESS) + return; + assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount); + + // We don't know the size of the string table. It's cheaper + // to map the whol VM region than to determine the size by + // parsing all teh nlist entries. + vm_address_t string_address = (vm_address_t)stringTable; + vm_size_t region_size; + mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64; + vm_region_basic_info_data_t info; + memory_object_name_t object; + ret = vm_region_64(mach_task_self(), &string_address, + ®ion_size, VM_REGION_BASIC_INFO_64, + (vm_region_info_t)&info, &info_count, &object); + if (ret != KERN_SUCCESS) + return; + + ret = vm_read(mach_task_self(), (vm_address_t)stringTable, + region_size - + ((vm_address_t)stringTable - string_address), + &vm_string_memory, &vm_string_bytes_read); + if (ret != KERN_SUCCESS) + return; + + nlist_buffer = (void *)vm_nlist_memory; + string_table = (char *)vm_string_memory; + nlist_count = nlistCount; + }); + }); + }); + if (nlist_buffer) { + DataExtractor dsc_local_symbols_data(nlist_buffer, + nlist_count * nlist_byte_size, + byte_order, addr_byte_size); + unmapped_local_symbols_found = nlist_count; // The normal nlist code cannot correctly size the Symbols // array, we need to allocate it here. @@ -2869,13 +2782,10 @@ unmapped_local_symbols_found - m_dysymtab.nlocalsym); num_syms = symtab.GetNumSymbols(); - nlist_data_offset = - local_symbols_info.nlistOffset + - (nlist_byte_size * local_symbols_entry.nlistStartIndex); - uint32_t string_table_offset = local_symbols_info.stringsOffset; + lldb::offset_t nlist_data_offset = 0; for (uint32_t nlist_index = 0; - nlist_index < local_symbols_entry.nlistCount; + nlist_index < nlist_count; nlist_index++) { ///////////////////////////// { @@ -2887,8 +2797,7 @@ struct nlist_64 nlist = *nlist_maybe; SymbolType type = eSymbolTypeInvalid; - const char *symbol_name = dsc_local_symbols_data.PeekCStr( - string_table_offset + nlist.n_strx); + const char *symbol_name = string_table + nlist.n_strx; if (symbol_name == NULL) { // No symbol should be NULL, even the symbols with no @@ -2898,7 +2807,7 @@ Host::eSystemLogError, "error: DSC unmapped local symbol[%u] has invalid " "string table offset 0x%x in %s, ignoring symbol\n", - entry_index, nlist.n_strx, + nlist_index, nlist.n_strx, module_sp->GetFileSpec().GetPath().c_str()); continue; } @@ -3759,8 +3668,6 @@ } ///////////////////////////// } - break; // No more entries to consider - } } for (const auto &pos : reexport_shlib_needs_fixup) { @@ -3774,14 +3681,9 @@ } } } - } - } - } - } - // Must reset this in case it was mutated above! - nlist_data_offset = 0; #endif + lldb::offset_t nlist_data_offset = 0; if (nlist_data.GetByteSize() > 0) { diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -17,9 +17,6 @@ #include "llvm/Config/llvm-config.h" -#ifdef __cplusplus -#include -#endif #include #if defined(_MSC_VER) diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h --- a/llvm/include/llvm/Transforms/IPO/IROutliner.h +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -95,6 +95,10 @@ /// required for the following basic blocks in this case. bool EndsInBranch = false; + /// The PHIBlocks with their corresponding return block based on the return + /// value as the key. + DenseMap PHIBlocks; + /// Mapping of the argument number in the deduplicated function /// to a given constant, which is used when creating the arguments to the call /// to the newly created deduplicated function. This is handled separately @@ -182,7 +186,14 @@ IROutliner(function_ref GTTI, function_ref GIRSI, function_ref GORE) - : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {} + : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) { + + // Check that the DenseMap implementation has not changed. + assert(DenseMapInfo::getEmptyKey() == (unsigned)-1 && + "DenseMapInfo's empty key isn't -1!"); + assert(DenseMapInfo::getTombstoneKey() == (unsigned)-2 && + "DenseMapInfo's tombstone key isn't -2!"); + } bool run(Module &M); private: @@ -356,6 +367,11 @@ Function *F = CI.getCalledFunction(); if (!F || CI.isIndirectCall() || !F->hasName()) return false; + // Returning twice can cause issues with the state of the function call + // that were not expected when the function was used, so we do not include + // the call in outlined functions. + if (CI.canReturnTwice()) + return false; return true; } // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp --- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp +++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp @@ -167,13 +167,19 @@ return {}; } auto &DU = *Die.getDwarfUnit(); + auto AddressSize = DU.getAddressByteSize(); for (auto &Location : *Locations) { - auto AddressSize = DU.getAddressByteSize(); DataExtractor Data(Location.Expr, DICtx->isLittleEndian(), AddressSize); DWARFExpression Expr(Data, AddressSize); - for (auto &Op : Expr) - if (Op.getCode() == dwarf::DW_OP_addr) + for (auto &Op : Expr) { + if (Op.getCode() == dwarf::DW_OP_addr) { return Op.getRawOperand(0); + } else if (Op.getCode() == dwarf::DW_OP_addrx) { + uint64_t Index = Op.getRawOperand(0); + if (auto SA = DU.getAddrOffsetSectionItem(Index)) + return SA->Address; + } + } } return {}; } diff --git a/llvm/lib/Support/MemAlloc.cpp b/llvm/lib/Support/MemAlloc.cpp --- a/llvm/lib/Support/MemAlloc.cpp +++ b/llvm/lib/Support/MemAlloc.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/MemAlloc.h" +#include // These are out of line to have __cpp_aligned_new not affect ABI. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -614,6 +614,12 @@ setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); + // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*. + if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) { + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); + } + setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); @@ -910,8 +916,11 @@ setOperationAction(ISD::UMAX, VT, Custom); setOperationAction(ISD::ABS, VT, Custom); - setOperationAction(ISD::MULHS, VT, Custom); - setOperationAction(ISD::MULHU, VT, Custom); + // vXi64 MULHS/MULHU requires the V extension instead of Zve64*. + if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) { + setOperationAction(ISD::MULHS, VT, Custom); + setOperationAction(ISD::MULHU, VT, Custom); + } setOperationAction(ISD::SADDSAT, VT, Custom); setOperationAction(ISD::UADDSAT, VT, Custom); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -156,6 +156,7 @@ bool hasStdExtF() const { return HasStdExtF; } bool hasStdExtD() const { return HasStdExtD; } bool hasStdExtC() const { return HasStdExtC; } + bool hasStdExtV() const { return HasStdExtV; } bool hasStdExtZba() const { return HasStdExtZba; } bool hasStdExtZbb() const { return HasStdExtZbb; } bool hasStdExtZbc() const { return HasStdExtZbc; } diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -106,6 +106,16 @@ /// of the region. unsigned BranchesToOutside = 0; + /// Tracker counting backwards from the highest unsigned value possible to + /// avoid conflicting with the GVNs of assigned values. We start at -3 since + /// -2 and -1 are assigned by the DenseMap. + unsigned PHINodeGVNTracker = -3; + + DenseMap, SmallVector>> + PHINodeGVNToGVNs; + DenseMap GVNsToPHINodeGVN; + /// The number of instructions that will be outlined by extracting \ref /// Regions. InstructionCost Benefit = 0; @@ -356,6 +366,24 @@ return Benefit; } +/// Check the \p OutputMappings structure for value \p Input, if it exists +/// it has been used as an output for outlining, and has been renamed, and we +/// return the new value, otherwise, we return the same value. +/// +/// \param OutputMappings [in] - The mapping of values to their renamed value +/// after being used as an output for an outlined region. +/// \param Input [in] - The value to find the remapped value of, if it exists. +/// \return The remapped value if it has been renamed, and the same value if has +/// not. +static Value *findOutputMapping(const DenseMap OutputMappings, + Value *Input) { + DenseMap::const_iterator OutputMapping = + OutputMappings.find(Input); + if (OutputMapping != OutputMappings.end()) + return OutputMapping->second; + return Input; +} + /// Find whether \p Region matches the global value numbering to Constant /// mapping found so far. /// @@ -832,6 +860,209 @@ Region.NumExtractedInputs = OriginalIndex; } +/// Check if the \p V has any uses outside of the region other than \p PN. +/// +/// \param V [in] - The value to check. +/// \param PHILoc [in] - The location in the PHINode of \p V. +/// \param PN [in] - The PHINode using \p V. +/// \param Exits [in] - The potential blocks we exit to from the outlined +/// region. +/// \param BlocksInRegion [in] - The basic blocks contained in the region. +/// \returns true if \p V has any use soutside its region other than \p PN. +static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, + SmallPtrSet &Exits, + DenseSet &BlocksInRegion) { + // We check to see if the value is used by the PHINode from some other + // predecessor not included in the region. If it is, we make sure + // to keep it as an output. + SmallVector IncomingNumbers(PN.getNumIncomingValues()); + std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0); + if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) { + return (Idx != PHILoc && V == PN.getIncomingValue(Idx) && + !BlocksInRegion.contains(PN.getIncomingBlock(Idx))); + })) + return true; + + // Check if the value is used by any other instructions outside the region. + return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) { + Instruction *I = dyn_cast(U); + if (!I) + return false; + + // If the use of the item is inside the region, we skip it. Uses + // inside the region give us useful information about how the item could be + // used as an output. + BasicBlock *Parent = I->getParent(); + if (BlocksInRegion.contains(Parent)) + return false; + + // If it's not a PHINode then we definitely know the use matters. This + // output value will not completely combined with another item in a PHINode + // as it is directly reference by another non-phi instruction + if (!isa(I)) + return true; + + // If we have a PHINode outside one of the exit locations, then it + // can be considered an outside use as well. If there is a PHINode + // contained in the Exit where this values use matters, it will be + // caught when we analyze that PHINode. + if (!Exits.contains(Parent)) + return true; + + return false; + }); +} + +/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be +/// considered outputs. A PHINodes is an output when more than one incoming +/// value has been marked by the CodeExtractor as an output. +/// +/// \param CurrentExitFromRegion [in] - The block to analyze. +/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the +/// region. +/// \param RegionBlocks [in] - The basic blocks in the region. +/// \param Outputs [in, out] - The existing outputs for the region, we may add +/// PHINodes to this as we find that they replace output values. +/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are +/// totally replaced by a PHINode. +/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used +/// in PHINodes, but have other uses, and should still be considered outputs. +static void analyzeExitPHIsForOutputUses( + BasicBlock *CurrentExitFromRegion, + SmallPtrSet &PotentialExitsFromRegion, + DenseSet &RegionBlocks, SetVector &Outputs, + DenseSet &OutputsReplacedByPHINode, + DenseSet &OutputsWithNonPhiUses) { + for (PHINode &PN : CurrentExitFromRegion->phis()) { + // Find all incoming values from the outlining region. + SmallVector IncomingVals; + for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I) + if (RegionBlocks.contains(PN.getIncomingBlock(I))) + IncomingVals.push_back(I); + + // Do not process PHI if there are no predecessors from region. + unsigned NumIncomingVals = IncomingVals.size(); + if (NumIncomingVals == 0) + continue; + + // If there is one predecessor, we mark it as a value that needs to be kept + // as an output. + if (NumIncomingVals == 1) { + Value *V = PN.getIncomingValue(*IncomingVals.begin()); + OutputsWithNonPhiUses.insert(V); + OutputsReplacedByPHINode.erase(V); + continue; + } + + // This PHINode will be used as an output value, so we add it to our list. + Outputs.insert(&PN); + + // Not all of the incoming values should be ignored as other inputs and + // outputs may have uses in outlined region. If they have other uses + // outside of the single PHINode we should not skip over it. + for (unsigned Idx : IncomingVals) { + Value *V = PN.getIncomingValue(Idx); + if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) { + OutputsWithNonPhiUses.insert(V); + OutputsReplacedByPHINode.erase(V); + continue; + } + if (!OutputsWithNonPhiUses.contains(V)) + OutputsReplacedByPHINode.insert(V); + } + } +} + +// Represents the type for the unsigned number denoting the output number for +// phi node, along with the canonical number for the exit block. +using ArgLocWithBBCanon = std::pair; +// The list of canonical numbers for the incoming values to a PHINode. +using CanonList = SmallVector; +// The pair type representing the set of canonical values being combined in the +// PHINode, along with the location data for the PHINode. +using PHINodeData = std::pair; + +/// Encode \p PND as an integer for easy lookup based on the argument location, +/// the parent BasicBlock canonical numbering, and the canonical numbering of +/// the values stored in the PHINode. +/// +/// \param PND - The data to hash. +/// \returns The hash code of \p PND. +static hash_code encodePHINodeData(PHINodeData &PND) { + return llvm::hash_combine( + llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second), + llvm::hash_combine_range(PND.second.begin(), PND.second.end())); +} + +/// Create a special GVN for PHINodes that will be used outside of +/// the region. We create a hash code based on the Canonical number of the +/// parent BasicBlock, the canonical numbering of the values stored in the +/// PHINode and the aggregate argument location. This is used to find whether +/// this PHINode type has been given a canonical numbering already. If not, we +/// assign it a value and store it for later use. The value is returned to +/// identify different output schemes for the set of regions. +/// +/// \param Region - The region that \p PN is an output for. +/// \param PN - The PHINode we are analyzing. +/// \param AggArgIdx - The argument \p PN will be stored into. +/// \returns An optional holding the assigned canonical number, or None if +/// there is some attribute of the PHINode blocking it from being used. +static Optional getGVNForPHINode(OutlinableRegion &Region, + PHINode *PN, unsigned AggArgIdx) { + OutlinableGroup &Group = *Region.Parent; + IRSimilarityCandidate &Cand = *Region.Candidate; + BasicBlock *PHIBB = PN->getParent(); + CanonList PHIGVNs; + for (Value *Incoming : PN->incoming_values()) { + // If we cannot find a GVN, this means that the input to the PHINode is + // not included in the region we are trying to analyze, meaning, that if + // it was outlined, we would be adding an extra input. We ignore this + // case for now, and so ignore the region. + Optional OGVN = Cand.getGVN(Incoming); + if (!OGVN.hasValue()) { + Region.IgnoreRegion = true; + return None; + } + + // Collect the canonical numbers of the values in the PHINode. + unsigned GVN = OGVN.getValue(); + OGVN = Cand.getCanonicalNum(GVN); + assert(OGVN.hasValue() && "No GVN found for incoming value?"); + PHIGVNs.push_back(*OGVN); + } + + // Now that we have the GVNs for the incoming values, we are going to combine + // them with the GVN of the incoming bock, and the output location of the + // PHINode to generate a hash value representing this instance of the PHINode. + DenseMap::iterator GVNToPHIIt; + DenseMap::iterator PHIToGVNIt; + Optional BBGVN = Cand.getGVN(PHIBB); + assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!"); + + BBGVN = Cand.getCanonicalNum(BBGVN.getValue()); + assert(BBGVN.hasValue() && + "Could not find canonical number for the incoming block!"); + // Create a pair of the exit block canonical value, and the aggregate + // argument location, connected to the canonical numbers stored in the + // PHINode. + PHINodeData TemporaryPair = + std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs); + hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair); + + // Look for and create a new entry in our connection between canonical + // numbers for PHINodes, and the set of objects we just created. + GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash); + if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) { + bool Inserted = false; + std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert( + std::make_pair(Group.PHINodeGVNTracker, TemporaryPair)); + std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert( + std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--)); + } + + return GVNToPHIIt->second; +} + /// Create a mapping of the output arguments for the \p Region to the output /// arguments of the overall outlined function. /// @@ -844,35 +1075,25 @@ IRSimilarityCandidate &C = *Region.Candidate; SmallVector BE; - DenseSet BBSet; - C.getBasicBlocks(BBSet, BE); + DenseSet BlocksInRegion; + C.getBasicBlocks(BlocksInRegion, BE); // Find the exits to the region. SmallPtrSet Exits; for (BasicBlock *Block : BE) for (BasicBlock *Succ : successors(Block)) - if (!BBSet.contains(Succ)) + if (!BlocksInRegion.contains(Succ)) Exits.insert(Succ); // After determining which blocks exit to PHINodes, we add these PHINodes to // the set of outputs to be processed. We also check the incoming values of // the PHINodes for whether they should no longer be considered outputs. - for (BasicBlock *ExitBB : Exits) { - for (PHINode &PN : ExitBB->phis()) { - // Find all incoming values from the outlining region. - SmallVector IncomingVals; - for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx) - if (BBSet.contains(PN.getIncomingBlock(Idx))) - IncomingVals.push_back(Idx); - - // Do not process PHI if there is one (or fewer) predecessor from region. - if (IncomingVals.size() <= 1) - continue; - - Region.IgnoreRegion = true; - return; - } - } + DenseSet OutputsReplacedByPHINode; + DenseSet OutputsWithNonPhiUses; + for (BasicBlock *ExitBB : Exits) + analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs, + OutputsReplacedByPHINode, + OutputsWithNonPhiUses); // This counts the argument number in the extracted function. unsigned OriginalIndex = Region.NumExtractedInputs; @@ -895,9 +1116,13 @@ // do not have to be in same order, but are functionally the same, we will // have to use a different scheme, as one-to-one correspondence is not // guaranteed. - unsigned GlobalValue = C.getGVN(Output).getValue(); unsigned ArgumentSize = Group.ArgumentTypes.size(); + // If the output is combined in a PHINode, we make sure to skip over it. + if (OutputsReplacedByPHINode.contains(Output)) + continue; + + unsigned AggArgIdx = 0; for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) { if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType())) continue; @@ -909,7 +1134,7 @@ AggArgsUsed.insert(Jdx); Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx)); Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex)); - Region.GVNStores.push_back(GlobalValue); + AggArgIdx = Jdx; break; } @@ -918,18 +1143,54 @@ // function to handle this output and create a mapping to it. if (!TypeFound) { Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType())); - AggArgsUsed.insert(Group.ArgumentTypes.size() - 1); + // Mark the new pointer type as the last value in the aggregate argument + // list. + unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1; + AggArgsUsed.insert(ArgTypeIdx); Region.ExtractedArgToAgg.insert( - std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1)); + std::make_pair(OriginalIndex, ArgTypeIdx)); Region.AggArgToExtracted.insert( - std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex)); - Region.GVNStores.push_back(GlobalValue); + std::make_pair(ArgTypeIdx, OriginalIndex)); + AggArgIdx = ArgTypeIdx; + } + + // TODO: Adapt to the extra input from the PHINode. + PHINode *PN = dyn_cast(Output); + + Optional GVN; + if (PN && !BlocksInRegion.contains(PN->getParent())) { + // Values outside the region can be combined into PHINode when we + // have multiple exits. We collect both of these into a list to identify + // which values are being used in the PHINode. Each list identifies a + // different PHINode, and a different output. We store the PHINode as it's + // own canonical value. These canonical values are also dependent on the + // output argument it is saved to. + + // If two PHINodes have the same canonical values, but different aggregate + // argument locations, then they will have distinct Canonical Values. + GVN = getGVNForPHINode(Region, PN, AggArgIdx); + if (!GVN.hasValue()) + return; + } else { + // If we do not have a PHINode we use the global value numbering for the + // output value, to find the canonical number to add to the set of stored + // values. + GVN = C.getGVN(Output); + GVN = C.getCanonicalNum(*GVN); } - stable_sort(Region.GVNStores); + // Each region has a potentially unique set of outputs. We save which + // values are output in a list of canonical values so we can differentiate + // among the different store schemes. + Region.GVNStores.push_back(*GVN); + OriginalIndex++; TypeIndex++; } + + // We sort the stored values to make sure that we are not affected by analysis + // order when determining what combination of items were stored. + stable_sort(Region.GVNStores); } void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region, @@ -1065,6 +1326,214 @@ return Call; } +/// Find or create a BasicBlock in the outlined function containing PhiBlocks +/// for \p RetVal. +/// +/// \param Group - The OutlinableGroup containing the information about the +/// overall outlined function. +/// \param RetVal - The return value or exit option that we are currently +/// evaluating. +/// \returns The found or newly created BasicBlock to contain the needed +/// PHINodes to be used as outputs. +static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) { + DenseMap::iterator PhiBlockForRetVal, + ReturnBlockForRetVal; + PhiBlockForRetVal = Group.PHIBlocks.find(RetVal); + ReturnBlockForRetVal = Group.EndBBs.find(RetVal); + assert(ReturnBlockForRetVal != Group.EndBBs.end() && + "Could not find output value!"); + BasicBlock *ReturnBB = ReturnBlockForRetVal->second; + + // Find if a PHIBlock exists for this return value already. If it is + // the first time we are analyzing this, we will not, so we record it. + PhiBlockForRetVal = Group.PHIBlocks.find(RetVal); + if (PhiBlockForRetVal != Group.PHIBlocks.end()) + return PhiBlockForRetVal->second; + + // If we did not find a block, we create one, and insert it into the + // overall function and record it. + bool Inserted = false; + BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block", + ReturnBB->getParent()); + std::tie(PhiBlockForRetVal, Inserted) = + Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock)); + + // We find the predecessors of the return block in the newly created outlined + // function in order to point them to the new PHIBlock rather than the already + // existing return block. + SmallVector BranchesToChange; + for (BasicBlock *Pred : predecessors(ReturnBB)) + BranchesToChange.push_back(cast(Pred->getTerminator())); + + // Now we mark the branch instructions found, and change the references of the + // return block to the newly created PHIBlock. + for (BranchInst *BI : BranchesToChange) + for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) { + if (BI->getSuccessor(Succ) != ReturnBB) + continue; + BI->setSuccessor(Succ, PHIBlock); + } + + BranchInst::Create(ReturnBB, PHIBlock); + + return PhiBlockForRetVal->second; +} + +/// For the function call now representing the \p Region, find the passed value +/// to that call that represents Argument \p A at the call location if the +/// call has already been replaced with a call to the overall, aggregate +/// function. +/// +/// \param A - The Argument to get the passed value for. +/// \param Region - The extracted Region corresponding to the outlined function. +/// \returns The Value representing \p A at the call site. +static Value * +getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, + const OutlinableRegion &Region) { + // If we don't need to adjust the argument number at all (since the call + // has already been replaced by a call to the overall outlined function) + // we can just get the specified argument. + return Region.Call->getArgOperand(A->getArgNo()); +} + +/// For the function call now representing the \p Region, find the passed value +/// to that call that represents Argument \p A at the call location if the +/// call has only been replaced by the call to the aggregate function. +/// +/// \param A - The Argument to get the passed value for. +/// \param Region - The extracted Region corresponding to the outlined function. +/// \returns The Value representing \p A at the call site. +static Value * +getPassedArgumentAndAdjustArgumentLocation(const Argument *A, + const OutlinableRegion &Region) { + unsigned ArgNum = A->getArgNo(); + + // If it is a constant, we can look at our mapping from when we created + // the outputs to figure out what the constant value is. + if (Region.AggArgToConstant.count(ArgNum)) + return Region.AggArgToConstant.find(ArgNum)->second; + + // If it is not a constant, and we are not looking at the overall function, we + // need to adjust which argument we are looking at. + ArgNum = Region.AggArgToExtracted.find(ArgNum)->second; + return Region.Call->getArgOperand(ArgNum); +} + +/// Find the canonical numbering for the incoming Values into the PHINode \p PN. +/// +/// \param PN [in] - The PHINode that we are finding the canonical numbers for. +/// \param Region [in] - The OutlinableRegion containing \p PN. +/// \param OutputMappings [in] - The mapping of output values from outlined +/// region to their original values. +/// \param CanonNums [out] - The canonical numbering for the incoming values to +/// \p PN. +/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call +/// of \p Region rather than the overall function's call. +static void +findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, + const DenseMap &OutputMappings, + DenseSet &CanonNums, + bool ReplacedWithOutlinedCall = true) { + // Iterate over the incoming values. + for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) { + Value *IVal = PN->getIncomingValue(Idx); + // If we have an argument as incoming value, we need to grab the passed + // value from the call itself. + if (Argument *A = dyn_cast(IVal)) { + if (ReplacedWithOutlinedCall) + IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region); + else + IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region); + } + + // Get the original value if it has been replaced by an output value. + IVal = findOutputMapping(OutputMappings, IVal); + + // Find and add the canonical number for the incoming value. + Optional GVN = Region.Candidate->getGVN(IVal); + assert(GVN.hasValue() && "No GVN for incoming value"); + Optional CanonNum = Region.Candidate->getCanonicalNum(*GVN); + assert(CanonNum.hasValue() && "No Canonical Number for GVN"); + CanonNums.insert(*CanonNum); + } +} + +/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock +/// in order to condense the number of instructions added to the outlined +/// function. +/// +/// \param PN [in] - The PHINode that we are finding the canonical numbers for. +/// \param Region [in] - The OutlinableRegion containing \p PN. +/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find +/// \p PN in. +/// \param OutputMappings [in] - The mapping of output values from outlined +/// region to their original values. +/// \return the newly found or created PHINode in \p OverallPhiBlock. +static PHINode* +findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, + BasicBlock *OverallPhiBlock, + const DenseMap &OutputMappings) { + OutlinableGroup &Group = *Region.Parent; + + DenseSet PNCanonNums; + // We have to use the extracted function since we have merged this region into + // the overall function yet. We make sure to reassign the argument numbering + // since it is possible that the argument ordering is different between the + // functions. + findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums, + /* ReplacedWithOutlinedCall = */ false); + + OutlinableRegion *FirstRegion = Group.Regions[0]; + DenseSet CurrentCanonNums; + // Find the Canonical Numbering for each PHINode, if it matches, we replace + // the uses of the PHINode we are searching for, with the found PHINode. + for (PHINode &CurrPN : OverallPhiBlock->phis()) { + CurrentCanonNums.clear(); + findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums, + /* ReplacedWithOutlinedCall = */ true); + + if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) { + return CurrentCanonNums.contains(CanonNum); + })) + return &CurrPN; + } + + // If we've made it here, it means we weren't able to replace the PHINode, so + // we must insert it ourselves. + PHINode *NewPN = cast(PN.clone()); + NewPN->insertBefore(&*OverallPhiBlock->begin()); + for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx; + Idx++) { + Value *IncomingVal = NewPN->getIncomingValue(Idx); + BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx); + + // Find corresponding basic block in the overall function for the incoming + // block. + Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI(); + assert(FirstNonPHI && "Incoming block is empty?"); + Value *CorrespondingVal = + Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI); + assert(CorrespondingVal && "Value is nullptr?"); + BasicBlock *BlockToUse = cast(CorrespondingVal)->getParent(); + NewPN->setIncomingBlock(Idx, BlockToUse); + + // If we have an argument we make sure we replace using the argument from + // the correct function. + if (Argument *A = dyn_cast(IncomingVal)) { + Value *Val = Group.OutlinedFunction->getArg(A->getArgNo()); + NewPN->setIncomingValue(Idx, Val); + continue; + } + + // Find the corresponding value in the overall function. + IncomingVal = findOutputMapping(OutputMappings, IncomingVal); + Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal); + assert(Val && "Value is nullptr?"); + NewPN->setIncomingValue(Idx, Val); + } + return NewPN; +} + // Within an extracted function, replace the argument uses of the extracted // region with the arguments of the function for an OutlinableGroup. // @@ -1077,6 +1546,7 @@ static void replaceArgumentUses(OutlinableRegion &Region, DenseMap &OutputBBs, + const DenseMap &OutputMappings, bool FirstFunction = false) { OutlinableGroup &Group = *Region.Parent; assert(Region.ExtractedFunction && "Region has no extracted function?"); @@ -1146,12 +1616,46 @@ LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to " << *OutputBB << "\n"); - if (FirstFunction) + // If this is storing a PHINode, we must make sure it is included in the + // overall function. + if (!isa(ValueOperand)) { + if (FirstFunction) + continue; + Value *CorrVal = + Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand); + assert(CorrVal && "Value is nullptr?"); + NewI->setOperand(0, CorrVal); + continue; + } + PHINode *PN = cast(SI->getValueOperand()); + // If it has a value, it was not split by the code extractor, which + // is what we are looking for. + if (Region.Candidate->getGVN(PN).hasValue()) continue; - Value *CorrVal = - Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand); - assert(CorrVal && "Value is nullptr?"); - NewI->setOperand(0, CorrVal); + + // We record the parent block for the PHINode in the Region so that + // we can exclude it from checks later on. + Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent())); + + // If this is the first function, we do not need to worry about mergiing + // this with any other block in the overall outlined function, so we can + // just continue. + if (FirstFunction) { + BasicBlock *PHIBlock = PN->getParent(); + Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock)); + continue; + } + + // We look for the aggregate block that contains the PHINodes leading into + // this exit path. If we can't find one, we create one. + BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal); + + // For our PHINode, we find the combined canonical numbering, and + // attempt to find a matching PHINode in the overall PHIBlock. If we + // cannot, we copy the PHINode and move it into this new block. + PHINode *NewPN = + findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings); + NewI->setOperand(0, NewPN); } // If we added an edge for basic blocks without a predecessor, we remove it @@ -1392,7 +1896,12 @@ Module &M, OutlinableGroup &OG, DenseMap &EndBBs, std::vector> &OutputStoreBBs) { // We only need the switch statement if there is more than one store - // combination. + // combination, or there is more than one set of output blocks. The first + // will occur when we store different sets of values for two different + // regions. The second will occur when we have two outputs that are combined + // in a PHINode outside of the region in one outlined instance, and are used + // seaparately in another. This will create the same set of OutputGVNs, but + // will generate two different output schemes. if (OG.OutputGVNCombinations.size() > 1) { Function *AggFunc = OG.OutlinedFunction; // Create a final block for each different return block. @@ -1435,8 +1944,14 @@ return; } + assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!"); + // If there needs to be stores, move them from the output blocks to their - // corresponding ending block. + // corresponding ending block. We do not check that the OutputGVNCombinations + // is equal to 1 here since that could just been the case where there are 0 + // outputs. Instead, we check whether there is more than one set of output + // blocks since this is the only case where we would have to move the + // stores, and erase the extraneous blocks. if (OutputStoreBBs.size() == 1) { LLVM_DEBUG(dbgs() << "Move store instructions to the end block in " << *OG.OutlinedFunction << "\n"); @@ -1468,10 +1983,13 @@ /// set of stores needed for the different functions. /// \param [in,out] FuncsToRemove - Extracted functions to erase from module /// once outlining is complete. +/// \param [in] OutputMappings - Extracted functions to erase from module +/// once outlining is complete. static void fillOverallFunction( Module &M, OutlinableGroup &CurrentGroup, std::vector> &OutputStoreBBs, - std::vector &FuncsToRemove) { + std::vector &FuncsToRemove, + const DenseMap &OutputMappings) { OutlinableRegion *CurrentOS = CurrentGroup.Regions[0]; // Move first extracted function's instructions into new function. @@ -1491,7 +2009,7 @@ CurrentGroup.OutlinedFunction, "output_block_0"); CurrentOS->OutputBlockNum = 0; - replaceArgumentUses(*CurrentOS, NewBBs, true); + replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true); replaceConstants(*CurrentOS); // We first identify if any output blocks are empty, if they are we remove @@ -1525,7 +2043,8 @@ OutlinableRegion *CurrentOS; - fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove); + fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove, + OutputMappings); std::vector SortedKeys; for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) { @@ -1539,8 +2058,7 @@ createAndInsertBasicBlocks( CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction, "output_block_" + Twine(static_cast(Idx))); - - replaceArgumentUses(*CurrentOS, NewBBs); + replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings); alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs, CurrentGroup.EndBBs, OutputMappings, OutputStoreBBs); @@ -1708,6 +2226,34 @@ return RegionBenefit; } +/// For the \p OutputCanon number passed in find the value represented by this +/// canonical number. If it is from a PHINode, we pick the first incoming +/// value and return that Value instead. +/// +/// \param Region - The OutlinableRegion to get the Value from. +/// \param OutputCanon - The canonical number to find the Value from. +/// \returns The Value represented by a canonical number \p OutputCanon in \p +/// Region. +static Value *findOutputValueInRegion(OutlinableRegion &Region, + unsigned OutputCanon) { + OutlinableGroup &CurrentGroup = *Region.Parent; + // If the value is greater than the value in the tracker, we have a + // PHINode and will instead use one of the incoming values to find the + // type. + if (OutputCanon > CurrentGroup.PHINodeGVNTracker) { + auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon); + assert(It != CurrentGroup.PHINodeGVNToGVNs.end() && + "Could not find GVN set for PHINode number!"); + assert(It->second.second.size() > 0 && "PHINode does not have any values!"); + OutputCanon = *It->second.second.begin(); + } + Optional OGVN = Region.Candidate->fromCanonicalNum(OutputCanon); + assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?"); + Optional OV = Region.Candidate->fromGVN(*OGVN); + assert(OV.hasValue() && "Could not find value for GVN?"); + return *OV; +} + InstructionCost IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) { InstructionCost OverallCost = 0; @@ -1715,10 +2261,8 @@ TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent()); // Each output incurs a load after the call, so we add that to the cost. - for (unsigned OutputGVN : Region->GVNStores) { - Optional OV = Region->Candidate->fromGVN(OutputGVN); - assert(OV.hasValue() && "Could not find value for GVN?"); - Value *V = OV.getValue(); + for (unsigned OutputCanon : Region->GVNStores) { + Value *V = findOutputValueInRegion(*Region, OutputCanon); InstructionCost LoadCost = TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0, TargetTransformInfo::TCK_CodeSize); @@ -1747,6 +2291,7 @@ InstructionCost OutputCost = 0; unsigned NumOutputBranches = 0; + OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0]; IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; DenseSet CandidateBlocks; Candidate.getBasicBlocks(CandidateBlocks); @@ -1772,10 +2317,8 @@ for (const ArrayRef &OutputUse : CurrentGroup.OutputGVNCombinations) { - for (unsigned GVN : OutputUse) { - Optional OV = Candidate.fromGVN(GVN); - assert(OV.hasValue() && "Could not find value for GVN?"); - Value *V = OV.getValue(); + for (unsigned OutputCanon : OutputUse) { + Value *V = findOutputValueInRegion(FirstRegion, OutputCanon); InstructionCost StoreCost = TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0, TargetTransformInfo::TCK_CodeSize); @@ -2035,8 +2578,8 @@ continue; SmallVector BE; - DenseSet BBSet; - OS->Candidate->getBasicBlocks(BBSet, BE); + DenseSet BlocksInRegion; + OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); @@ -2146,8 +2689,8 @@ OutlinedRegions.clear(); for (OutlinableRegion *OS : CurrentGroup.Regions) { SmallVector BE; - DenseSet BBSet; - OS->Candidate->getBasicBlocks(BBSet, BE); + DenseSet BlocksInRegion; + OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); diff --git a/llvm/test/Bindings/Go/go.test b/llvm/test/Bindings/Go/go.test --- a/llvm/test/Bindings/Go/go.test +++ b/llvm/test/Bindings/Go/go.test @@ -1,4 +1,5 @@ ; RUN: llvm-go test llvm.org/llvm/bindings/go/llvm ; REQUIRES: shell, default_triple -; UNSUPPORTED: asan, ubsan, msan +;; Building Go bindings with Clang is currently unsupported on AIX. +; UNSUPPORTED: asan, ubsan, msan, -aix diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -0,0 +1,2095 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BASE +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-DOT + +define i32 @add_v4i32_v4i32(<4 x i32> %x) { +; CHECK-LABEL: add_v4i32_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x) + ret i32 %z +} + +define i64 @add_v4i32_v4i64_zext(<4 x i32> %x) { +; CHECK-LABEL: add_v4i32_v4i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + ret i64 %z +} + +define i64 @add_v4i32_v4i64_sext(<4 x i32> %x) { +; CHECK-LABEL: add_v4i32_v4i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v1.2d, v0.2s, #0 +; CHECK-NEXT: saddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + ret i64 %z +} + +define i64 @add_v2i32_v2i64_zext(<2 x i32> %x) { +; CHECK-LABEL: add_v2i32_v2i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + ret i64 %z +} + +define i64 @add_v2i32_v2i64_sext(<2 x i32> %x) { +; CHECK-LABEL: add_v2i32_v2i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + ret i64 %z +} + +define i32 @add_v8i16_v8i32_zext(<8 x i16> %x) { +; CHECK-LABEL: add_v8i16_v8i32_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-NEXT: uaddw2 v0.4s, v1.4s, v0.8h +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + ret i32 %z +} + +define i32 @add_v8i16_v8i32_sext(<8 x i16> %x) { +; CHECK-LABEL: add_v8i16_v8i32_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-NEXT: saddw2 v0.4s, v1.4s, v0.8h +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + ret i32 %z +} + +define i32 @add_v4i16_v4i32_zext(<4 x i16> %x) { +; CHECK-LABEL: add_v4i16_v4i32_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + ret i32 %z +} + +define i32 @add_v4i16_v4i32_sext(<4 x i16> %x) { +; CHECK-LABEL: add_v4i16_v4i32_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + ret i32 %z +} + +define zeroext i16 @add_v8i16_v8i16(<8 x i16> %x) { +; CHECK-LABEL: add_v8i16_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x) + ret i16 %z +} + +define i64 @add_v8i16_v8i64_zext(<8 x i16> %x) { +; CHECK-LABEL: add_v8i16_v8i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uaddl2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + ret i64 %z +} + +define i64 @add_v8i16_v8i64_sext(<8 x i16> %x) { +; CHECK-LABEL: add_v8i16_v8i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: saddl2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + ret i64 %z +} + +define i64 @add_v4i16_v4i64_zext(<4 x i16> %x) { +; CHECK-LABEL: add_v4i16_v4i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + ret i64 %z +} + +define i64 @add_v4i16_v4i64_sext(<4 x i16> %x) { +; CHECK-LABEL: add_v4i16_v4i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v1.2d, v0.2s, #0 +; CHECK-NEXT: saddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + ret i64 %z +} + +define i64 @add_v2i16_v2i64_zext(<2 x i16> %x) { +; CHECK-LABEL: add_v2i16_v2i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i16> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + ret i64 %z +} + +define i64 @add_v2i16_v2i64_sext(<2 x i16> %x) { +; CHECK-LABEL: add_v2i16_v2i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #48 +; CHECK-NEXT: sshr v0.2d, v0.2d, #48 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i16> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + ret i64 %z +} + +define i32 @add_v16i8_v16i32_zext(<16 x i8> %x) { +; CHECK-BASE-LABEL: add_v16i8_v16i32_zext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: uaddl2 v2.4s, v0.8h, v1.8h +; CHECK-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w0, s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_v16i8_v16i32_zext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v1.16b, #1 +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: udot v2.4s, v0.16b, v1.16b +; CHECK-DOT-NEXT: addv s0, v2.4s +; CHECK-DOT-NEXT: fmov w0, s0 +; CHECK-DOT-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) + ret i32 %z +} + +define i32 @add_v16i8_v16i32_sext(<16 x i8> %x) { +; CHECK-BASE-LABEL: add_v16i8_v16i32_sext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: saddl2 v2.4s, v0.8h, v1.8h +; CHECK-BASE-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w0, s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_v16i8_v16i32_sext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v1.16b, #1 +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b +; CHECK-DOT-NEXT: addv s0, v2.4s +; CHECK-DOT-NEXT: fmov w0, s0 +; CHECK-DOT-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) + ret i32 %z +} + +define i32 @add_v8i8_v8i32_zext(<8 x i8> %x) { +; CHECK-BASE-LABEL: add_v8i8_v8i32_zext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-BASE-NEXT: uaddw2 v0.4s, v1.4s, v0.8h +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w0, s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_v8i8_v8i32_zext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v1.8b, #1 +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v1.8b +; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: fmov w0, s0 +; CHECK-DOT-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + ret i32 %z +} + +define i32 @add_v8i8_v8i32_sext(<8 x i8> %x) { +; CHECK-BASE-LABEL: add_v8i8_v8i32_sext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-BASE-NEXT: saddw2 v0.4s, v1.4s, v0.8h +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w0, s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_v8i8_v8i32_sext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v1.8b, #1 +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: sdot v2.2s, v0.8b, v1.8b +; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: fmov w0, s0 +; CHECK-DOT-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + ret i32 %z +} + +define i32 @add_v4i8_v4i32_zext(<4 x i8> %x) { +; CHECK-LABEL: add_v4i8_v4i32_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i8> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + ret i32 %z +} + +define i32 @add_v4i8_v4i32_sext(<4 x i8> %x) { +; CHECK-LABEL: add_v4i8_v4i32_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i8> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + ret i32 %z +} + +define zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x) { +; CHECK-LABEL: add_v16i8_v16i16_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-NEXT: uaddw2 v0.8h, v1.8h, v0.16b +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) + ret i16 %z +} + +define signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x) { +; CHECK-LABEL: add_v16i8_v16i16_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-NEXT: saddw2 v0.8h, v1.8h, v0.16b +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: smov w0, v0.h[0] +; CHECK-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) + ret i16 %z +} + +define zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x) { +; CHECK-LABEL: add_v8i8_v8i16_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) + ret i16 %z +} + +define signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x) { +; CHECK-LABEL: add_v8i8_v8i16_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: smov w0, v0.h[0] +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) + ret i16 %z +} + +define zeroext i8 @add_v16i8_v16i8(<16 x i8> %x) { +; CHECK-LABEL: add_v16i8_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addv b0, v0.16b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x) + ret i8 %z +} + +define i64 @add_v16i8_v16i64_zext(<16 x i8> %x) { +; CHECK-LABEL: add_v16i8_v16i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: uaddl v2.2d, v3.2s, v2.2s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v1.2d, v5.2d, v4.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) + ret i64 %z +} + +define i64 @add_v16i8_v16i64_sext(<16 x i8> %x) { +; CHECK-LABEL: add_v16i8_v16i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: saddl2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: saddl2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: saddl v2.2d, v3.2s, v2.2s +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v1.2d, v5.2d, v4.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) + ret i64 %z +} + +define i64 @add_v8i8_v8i64_zext(<8 x i8> %x) { +; CHECK-LABEL: add_v8i8_v8i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uaddl2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + ret i64 %z +} + +define i64 @add_v8i8_v8i64_sext(<8 x i8> %x) { +; CHECK-LABEL: add_v8i8_v8i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: saddl2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + ret i64 %z +} + +define i64 @add_v4i8_v4i64_zext(<4 x i8> %x) { +; CHECK-LABEL: add_v4i8_v4i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i8> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + ret i64 %z +} + +define i64 @add_v4i8_v4i64_sext(<4 x i8> %x) { +; CHECK-LABEL: add_v4i8_v4i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: shl v1.2d, v1.2d, #56 +; CHECK-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-NEXT: sshr v1.2d, v1.2d, #56 +; CHECK-NEXT: ssra v1.2d, v0.2d, #56 +; CHECK-NEXT: addp d0, v1.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i8> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + ret i64 %z +} + +define i64 @add_v2i8_v2i64_zext(<2 x i8> %x) { +; CHECK-LABEL: add_v2i8_v2i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0x0000ff000000ff +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i8> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + ret i64 %z +} + +define i64 @add_v2i8_v2i64_sext(<2 x i8> %x) { +; CHECK-LABEL: add_v2i8_v2i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i8> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + ret i64 %z +} + +define i64 @add_v2i64_v2i64(<2 x i64> %x) { +; CHECK-LABEL: add_v2i64_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x) + ret i64 %z +} + +define i32 @add_v4i32_v4i32_acc(<4 x i32> %x, i32 %a) { +; CHECK-LABEL: add_v4i32_v4i32_acc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret +entry: + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x) + %r = add i32 %z, %a + ret i32 %r +} + +define i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, i64 %a) { +; CHECK-LABEL: add_v4i32_v4i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) { +; CHECK-LABEL: add_v4i32_v4i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v1.2d, v0.2s, #0 +; CHECK-NEXT: saddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) { +; CHECK-LABEL: add_v2i32_v2i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, i64 %a) { +; CHECK-LABEL: add_v2i32_v2i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, i32 %a) { +; CHECK-LABEL: add_v8i16_v8i32_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-NEXT: uaddw2 v0.4s, v1.4s, v0.8h +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) { +; CHECK-LABEL: add_v8i16_v8i32_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-NEXT: saddw2 v0.4s, v1.4s, v0.8h +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, i32 %a) { +; CHECK-LABEL: add_v4i16_v4i32_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, i32 %a) { +; CHECK-LABEL: add_v4i16_v4i32_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, i16 %a) { +; CHECK-LABEL: add_v8i16_v8i16_acc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x) + %r = add i16 %z, %a + ret i16 %r +} + +define i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uaddl2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) { +; CHECK-LABEL: add_v8i16_v8i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: saddl2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v4i16_v4i64_acc_zext(<4 x i16> %x, i64 %a) { +; CHECK-LABEL: add_v4i16_v4i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v4i16_v4i64_acc_sext(<4 x i16> %x, i64 %a) { +; CHECK-LABEL: add_v4i16_v4i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v1.2d, v0.2s, #0 +; CHECK-NEXT: saddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) { +; CHECK-LABEL: add_v2i16_v2i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i16> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, i64 %a) { +; CHECK-LABEL: add_v2i16_v2i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #48 +; CHECK-NEXT: sshr v0.2d, v0.2d, #48 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i16> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, i32 %a) { +; CHECK-BASE-LABEL: add_v16i8_v16i32_acc_zext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: uaddl2 v2.4s, v0.8h, v1.8h +; CHECK-BASE-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w8, s0 +; CHECK-BASE-NEXT: add w0, w8, w0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_zext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v1.16b, #1 +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: udot v2.4s, v0.16b, v1.16b +; CHECK-DOT-NEXT: addv s0, v2.4s +; CHECK-DOT-NEXT: fmov w8, s0 +; CHECK-DOT-NEXT: add w0, w8, w0 +; CHECK-DOT-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, i32 %a) { +; CHECK-BASE-LABEL: add_v16i8_v16i32_acc_sext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: saddl2 v2.4s, v0.8h, v1.8h +; CHECK-BASE-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w8, s0 +; CHECK-BASE-NEXT: add w0, w8, w0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_sext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v1.16b, #1 +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: sdot v2.4s, v0.16b, v1.16b +; CHECK-DOT-NEXT: addv s0, v2.4s +; CHECK-DOT-NEXT: fmov w8, s0 +; CHECK-DOT-NEXT: add w0, w8, w0 +; CHECK-DOT-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define i32 @add_v8i8_v8i32_acc_zext(<8 x i8> %x, i32 %a) { +; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_zext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-BASE-NEXT: uaddw2 v0.4s, v1.4s, v0.8h +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w8, s0 +; CHECK-BASE-NEXT: add w0, w8, w0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_zext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v1.8b, #1 +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v1.8b +; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: fmov w8, s0 +; CHECK-DOT-NEXT: add w0, w8, w0 +; CHECK-DOT-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define i32 @add_v8i8_v8i32_acc_sext(<8 x i8> %x, i32 %a) { +; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_sext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-BASE-NEXT: saddw2 v0.4s, v1.4s, v0.8h +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w8, s0 +; CHECK-BASE-NEXT: add w0, w8, w0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_sext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v1.8b, #1 +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: sdot v2.2s, v0.8b, v1.8b +; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: fmov w8, s0 +; CHECK-DOT-NEXT: add w0, w8, w0 +; CHECK-DOT-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, i32 %a) { +; CHECK-LABEL: add_v4i8_v4i32_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i8> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, i32 %a) { +; CHECK-LABEL: add_v4i8_v4i32_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i8> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + %r = add i32 %z, %a + ret i32 %r +} + +define zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, i16 %a) { +; CHECK-LABEL: add_v16i8_v16i16_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-NEXT: uaddw2 v0.8h, v1.8h, v0.16b +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) + %r = add i16 %z, %a + ret i16 %r +} + +define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) { +; CHECK-LABEL: add_v16i8_v16i16_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-NEXT: saddw2 v0.8h, v1.8h, v0.16b +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: sxth w0, w8 +; CHECK-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) + %r = add i16 %z, %a + ret i16 %r +} + +define zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, i16 %a) { +; CHECK-LABEL: add_v8i8_v8i16_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) + %r = add i16 %z, %a + ret i16 %r +} + +define signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, i16 %a) { +; CHECK-LABEL: add_v8i8_v8i16_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: sxth w0, w8 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) + %r = add i16 %z, %a + ret i16 %r +} + +define zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, i8 %a) { +; CHECK-LABEL: add_v16i8_v16i8_acc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: ret +entry: + %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x) + %r = add i8 %z, %a + ret i8 %r +} + +define i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) { +; CHECK-LABEL: add_v16i8_v16i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: uaddl v2.2d, v3.2s, v2.2s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v1.2d, v5.2d, v4.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) { +; CHECK-LABEL: add_v16i8_v16i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: saddl2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: saddl2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: saddl v2.2d, v3.2s, v2.2s +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v1.2d, v5.2d, v4.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v8i8_v8i64_acc_zext(<8 x i8> %x, i64 %a) { +; CHECK-LABEL: add_v8i8_v8i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: uaddl2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v8i8_v8i64_acc_sext(<8 x i8> %x, i64 %a) { +; CHECK-LABEL: add_v8i8_v8i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: saddl2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v4i8_v4i64_acc_zext(<4 x i8> %x, i64 %a) { +; CHECK-LABEL: add_v4i8_v4i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v1.2d, v0.4s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i8> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v4i8_v4i64_acc_sext(<4 x i8> %x, i64 %a) { +; CHECK-LABEL: add_v4i8_v4i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: shl v1.2d, v1.2d, #56 +; CHECK-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-NEXT: sshr v1.2d, v1.2d, #56 +; CHECK-NEXT: ssra v1.2d, v0.2d, #56 +; CHECK-NEXT: addp d0, v1.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i8> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) { +; CHECK-LABEL: add_v2i8_v2i64_acc_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d1, #0x0000ff000000ff +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i8> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, i64 %a) { +; CHECK-LABEL: add_v2i8_v2i64_acc_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i8> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %r = add i64 %z, %a + ret i64 %r +} + +define i64 @add_v2i64_v2i64_acc(<2 x i64> %x, i64 %a) { +; CHECK-LABEL: add_v2i64_v2i64_acc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x0, x8, x0 +; CHECK-NEXT: ret +entry: + %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x) + %r = add i64 %z, %a + ret i64 %r +} + +define i32 @add_pair_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: add_pair_v4i32_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x) + %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %y) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i64 @add_pair_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: add_pair_v4i32_v4i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v2.2d, v0.2s, #0 +; CHECK-NEXT: ushll v3.2d, v1.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v2.2d, v0.4s +; CHECK-NEXT: uaddw2 v1.2d, v3.2d, v1.4s +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i32> %x to <4 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %yy = zext <4 x i32> %y to <4 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: add_pair_v4i32_v4i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v2.2d, v0.2s, #0 +; CHECK-NEXT: sshll v3.2d, v1.2s, #0 +; CHECK-NEXT: saddw2 v0.2d, v2.2d, v0.4s +; CHECK-NEXT: saddw2 v1.2d, v3.2d, v1.4s +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i32> %x to <4 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %yy = sext <4 x i32> %y to <4 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: add_pair_v2i32_v2i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i32> %x to <2 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %yy = zext <2 x i32> %y to <2 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: add_pair_v2i32_v2i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i32> %x to <2 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %yy = sext <2 x i32> %y to <2 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i32 @add_pair_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: add_pair_v8i16_v8i32_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: ushll v3.4s, v1.4h, #0 +; CHECK-NEXT: uaddw2 v0.4s, v2.4s, v0.8h +; CHECK-NEXT: uaddw2 v1.4s, v3.4s, v1.8h +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i16> %x to <8 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + %yy = zext <8 x i16> %y to <8 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i32 @add_pair_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: add_pair_v8i16_v8i32_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-NEXT: sshll v3.4s, v1.4h, #0 +; CHECK-NEXT: saddw2 v0.4s, v2.4s, v0.8h +; CHECK-NEXT: saddw2 v1.4s, v3.4s, v1.8h +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i16> %x to <8 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + %yy = sext <8 x i16> %y to <8 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i32 @add_pair_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y) { +; CHECK-LABEL: add_pair_v4i16_v4i32_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i16> %x to <4 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + %yy = zext <4 x i16> %y to <4 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i32 @add_pair_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %y) { +; CHECK-LABEL: add_pair_v4i16_v4i32_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i16> %x to <4 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + %yy = sext <4 x i16> %y to <4 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define zeroext i16 @add_pair_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: add_pair_v8i16_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: addv h1, v1.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x) + %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %y) + %z = add i16 %z1, %z2 + ret i16 %z +} + +define i64 @add_pair_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: add_pair_v8i16_v8i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v3.4s, v1.8h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: uaddl2 v4.2d, v0.4s, v2.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: uaddl2 v2.2d, v1.4s, v3.4s +; CHECK-NEXT: uaddl v1.2d, v1.2s, v3.2s +; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i16> %x to <8 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + %yy = zext <8 x i16> %y to <8 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: add_pair_v8i16_v8i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v1.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: saddl2 v4.2d, v0.4s, v2.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: saddl2 v2.2d, v1.4s, v3.4s +; CHECK-NEXT: saddl v1.2d, v1.2s, v3.2s +; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i16> %x to <8 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + %yy = sext <8 x i16> %y to <8 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %y) { +; CHECK-LABEL: add_pair_v4i16_v4i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll v2.2d, v0.2s, #0 +; CHECK-NEXT: ushll v3.2d, v1.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v2.2d, v0.4s +; CHECK-NEXT: uaddw2 v1.2d, v3.2d, v1.4s +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i16> %x to <4 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %yy = zext <4 x i16> %y to <4 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %y) { +; CHECK-LABEL: add_pair_v4i16_v4i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: sshll v2.2d, v0.2s, #0 +; CHECK-NEXT: sshll v3.2d, v1.2s, #0 +; CHECK-NEXT: saddw2 v0.2d, v2.2d, v0.4s +; CHECK-NEXT: saddw2 v1.2d, v3.2d, v1.4s +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i16> %x to <4 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %yy = sext <4 x i16> %y to <4 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) { +; CHECK-LABEL: add_pair_v2i16_v2i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i16> %x to <2 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %yy = zext <2 x i16> %y to <2 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %y) { +; CHECK-LABEL: add_pair_v2i16_v2i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #48 +; CHECK-NEXT: shl v1.2d, v1.2d, #48 +; CHECK-NEXT: sshr v0.2d, v0.2d, #48 +; CHECK-NEXT: ssra v0.2d, v1.2d, #48 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i16> %x to <2 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %yy = sext <2 x i16> %y to <2 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i32 @add_pair_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-BASE-LABEL: add_pair_v16i8_v16i32_zext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: ushll2 v2.8h, v0.16b, #0 +; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: ushll2 v3.8h, v1.16b, #0 +; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-BASE-NEXT: uaddl2 v4.4s, v0.8h, v2.8h +; CHECK-BASE-NEXT: uaddl v0.4s, v0.4h, v2.4h +; CHECK-BASE-NEXT: uaddl2 v2.4s, v1.8h, v3.8h +; CHECK-BASE-NEXT: uaddl v1.4s, v1.4h, v3.4h +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-BASE-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w0, s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_pair_v16i8_v16i32_zext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v2.16b, #1 +; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000 +; CHECK-DOT-NEXT: udot v3.4s, v1.16b, v2.16b +; CHECK-DOT-NEXT: udot v3.4s, v0.16b, v2.16b +; CHECK-DOT-NEXT: addv s0, v3.4s +; CHECK-DOT-NEXT: fmov w0, s0 +; CHECK-DOT-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) + %yy = zext <16 x i8> %y to <16 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i32 @add_pair_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-BASE-LABEL: add_pair_v16i8_v16i32_sext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: sshll2 v2.8h, v0.16b, #0 +; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: sshll2 v3.8h, v1.16b, #0 +; CHECK-BASE-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-BASE-NEXT: saddl2 v4.4s, v0.8h, v2.8h +; CHECK-BASE-NEXT: saddl v0.4s, v0.4h, v2.4h +; CHECK-BASE-NEXT: saddl2 v2.4s, v1.8h, v3.8h +; CHECK-BASE-NEXT: saddl v1.4s, v1.4h, v3.4h +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-BASE-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w0, s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_pair_v16i8_v16i32_sext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v2.16b, #1 +; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000 +; CHECK-DOT-NEXT: sdot v3.4s, v1.16b, v2.16b +; CHECK-DOT-NEXT: sdot v3.4s, v0.16b, v2.16b +; CHECK-DOT-NEXT: addv s0, v3.4s +; CHECK-DOT-NEXT: fmov w0, s0 +; CHECK-DOT-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx) + %yy = sext <16 x i8> %y to <16 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i32 @add_pair_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %y) { +; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_zext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-BASE-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-BASE-NEXT: ushll v3.4s, v1.4h, #0 +; CHECK-BASE-NEXT: uaddw2 v0.4s, v2.4s, v0.8h +; CHECK-BASE-NEXT: uaddw2 v1.4s, v3.4s, v1.8h +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w0, s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_zext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v2.8b, #1 +; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000 +; CHECK-DOT-NEXT: udot v3.2s, v1.8b, v2.8b +; CHECK-DOT-NEXT: udot v3.2s, v0.8b, v2.8b +; CHECK-DOT-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-DOT-NEXT: fmov w0, s0 +; CHECK-DOT-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + %yy = zext <8 x i8> %y to <8 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i32 @add_pair_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %y) { +; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_sext: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BASE-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-BASE-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-BASE-NEXT: sshll v3.4s, v1.4h, #0 +; CHECK-BASE-NEXT: saddw2 v0.4s, v2.4s, v0.8h +; CHECK-BASE-NEXT: saddw2 v1.4s, v3.4s, v1.8h +; CHECK-BASE-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-BASE-NEXT: addv s0, v0.4s +; CHECK-BASE-NEXT: fmov w0, s0 +; CHECK-BASE-NEXT: ret +; +; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_sext: +; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: movi v2.8b, #1 +; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000 +; CHECK-DOT-NEXT: sdot v3.2s, v1.8b, v2.8b +; CHECK-DOT-NEXT: sdot v3.2s, v0.8b, v2.8b +; CHECK-DOT-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-DOT-NEXT: fmov w0, s0 +; CHECK-DOT-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx) + %yy = sext <8 x i8> %y to <8 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i32 @add_pair_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: add_pair_v4i8_v4i32_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-NEXT: bic v1.4h, #255, lsl #8 +; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i8> %x to <4 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + %yy = zext <4 x i8> %y to <4 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define i32 @add_pair_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: add_pair_v4i8_v4i32_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: shl v1.4s, v1.4s, #24 +; CHECK-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-NEXT: ssra v0.4s, v1.4s, #24 +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i8> %x to <4 x i32> + %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx) + %yy = sext <4 x i8> %y to <4 x i32> + %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy) + %z = add i32 %z1, %z2 + ret i32 %z +} + +define zeroext i16 @add_pair_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: add_pair_v16i8_v16i16_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-NEXT: ushll v3.8h, v1.8b, #0 +; CHECK-NEXT: uaddw2 v0.8h, v2.8h, v0.16b +; CHECK-NEXT: uaddw2 v1.8h, v3.8h, v1.16b +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: addv h1, v1.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i16> + %z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) + %yy = zext <16 x i8> %y to <16 x i16> + %z2 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %yy) + %z = add i16 %z1, %z2 + ret i16 %z +} + +define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: add_pair_v16i8_v16i16_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v2.8h, v0.8b, #0 +; CHECK-NEXT: sshll v3.8h, v1.8b, #0 +; CHECK-NEXT: saddw2 v0.8h, v2.8h, v0.16b +; CHECK-NEXT: saddw2 v1.8h, v3.8h, v1.16b +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: addv h1, v1.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: sxth w0, w8 +; CHECK-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i16> + %z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx) + %yy = sext <16 x i8> %y to <16 x i16> + %z2 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %yy) + %z = add i16 %z1, %z2 + ret i16 %z +} + +define zeroext i16 @add_pair_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) { +; CHECK-LABEL: add_pair_v8i8_v8i16_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: addv h1, v1.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i16> + %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) + %yy = zext <8 x i8> %y to <8 x i16> + %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %yy) + %z = add i16 %z1, %z2 + ret i16 %z +} + +define signext i16 @add_pair_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %y) { +; CHECK-LABEL: add_pair_v8i8_v8i16_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: addv h0, v0.8h +; CHECK-NEXT: addv h1, v1.8h +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: sxth w0, w8 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i16> + %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx) + %yy = sext <8 x i8> %y to <8 x i16> + %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %yy) + %z = add i16 %z1, %z2 + ret i16 %z +} + +define zeroext i8 @add_pair_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: add_pair_v16i8_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addv b0, v0.16b +; CHECK-NEXT: addv b1, v1.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: ret +entry: + %z1 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x) + %z2 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %y) + %z = add i8 %z1, %z2 + ret i8 %z +} + +define i64 @add_pair_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: add_pair_v16i8_v16i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v3.8h, v1.16b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ushll2 v4.4s, v2.8h, #0 +; CHECK-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-NEXT: ushll2 v5.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v6.4s, v3.4h, #0 +; CHECK-NEXT: ushll v7.4s, v1.4h, #0 +; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: uaddl2 v16.2d, v5.4s, v4.4s +; CHECK-NEXT: uaddl v4.2d, v5.2s, v4.2s +; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: uaddl2 v2.2d, v1.4s, v3.4s +; CHECK-NEXT: uaddl v1.2d, v1.2s, v3.2s +; CHECK-NEXT: uaddl2 v3.2d, v7.4s, v6.4s +; CHECK-NEXT: uaddl v6.2d, v7.2s, v6.2s +; CHECK-NEXT: add v5.2d, v5.2d, v16.2d +; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v2.2d, v3.2d, v2.2d +; CHECK-NEXT: add v1.2d, v6.2d, v1.2d +; CHECK-NEXT: add v0.2d, v0.2d, v5.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <16 x i8> %x to <16 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) + %yy = zext <16 x i8> %y to <16 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: add_pair_v16i8_v16i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v2.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v3.8h, v1.16b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: sshll2 v4.4s, v2.8h, #0 +; CHECK-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-NEXT: sshll2 v5.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v6.4s, v3.4h, #0 +; CHECK-NEXT: sshll v7.4s, v1.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0 +; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: saddl2 v16.2d, v5.4s, v4.4s +; CHECK-NEXT: saddl v4.2d, v5.2s, v4.2s +; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: saddl2 v2.2d, v1.4s, v3.4s +; CHECK-NEXT: saddl v1.2d, v1.2s, v3.2s +; CHECK-NEXT: saddl2 v3.2d, v7.4s, v6.4s +; CHECK-NEXT: saddl v6.2d, v7.2s, v6.2s +; CHECK-NEXT: add v5.2d, v5.2d, v16.2d +; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v2.2d, v3.2d, v2.2d +; CHECK-NEXT: add v1.2d, v6.2d, v1.2d +; CHECK-NEXT: add v0.2d, v0.2d, v5.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <16 x i8> %x to <16 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx) + %yy = sext <16 x i8> %y to <16 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %y) { +; CHECK-LABEL: add_pair_v8i8_v8i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v3.4s, v1.8h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: uaddl2 v4.2d, v0.4s, v2.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: uaddl2 v2.2d, v1.4s, v3.4s +; CHECK-NEXT: uaddl v1.2d, v1.2s, v3.2s +; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <8 x i8> %x to <8 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + %yy = zext <8 x i8> %y to <8 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %y) { +; CHECK-LABEL: add_pair_v8i8_v8i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v1.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: saddl2 v4.2d, v0.4s, v2.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: saddl2 v2.2d, v1.4s, v3.4s +; CHECK-NEXT: saddl v1.2d, v1.2s, v3.2s +; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <8 x i8> %x to <8 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx) + %yy = sext <8 x i8> %y to <8 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: add_pair_v4i8_v4i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-NEXT: bic v1.4h, #255, lsl #8 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll v2.2d, v0.2s, #0 +; CHECK-NEXT: ushll v3.2d, v1.2s, #0 +; CHECK-NEXT: uaddw2 v0.2d, v2.2d, v0.4s +; CHECK-NEXT: uaddw2 v1.2d, v3.2d, v1.4s +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <4 x i8> %x to <4 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %yy = zext <4 x i8> %y to <4 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: add_pair_v4i8_v4i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll v2.2d, v0.2s, #0 +; CHECK-NEXT: ushll v3.2d, v1.2s, #0 +; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-NEXT: shl v2.2d, v2.2d, #56 +; CHECK-NEXT: shl v3.2d, v3.2d, #56 +; CHECK-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-NEXT: shl v1.2d, v1.2d, #56 +; CHECK-NEXT: sshr v2.2d, v2.2d, #56 +; CHECK-NEXT: sshr v3.2d, v3.2d, #56 +; CHECK-NEXT: ssra v2.2d, v0.2d, #56 +; CHECK-NEXT: ssra v3.2d, v1.2d, #56 +; CHECK-NEXT: add v0.2d, v2.2d, v3.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <4 x i8> %x to <4 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx) + %yy = sext <4 x i8> %y to <4 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: add_pair_v2i8_v2i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d2, #0x0000ff000000ff +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = zext <2 x i8> %x to <2 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %yy = zext <2 x i8> %y to <2 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: add_pair_v2i8_v2i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-NEXT: shl v1.2d, v1.2d, #56 +; CHECK-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-NEXT: ssra v0.2d, v1.2d, #56 +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %xx = sext <2 x i8> %x to <2 x i64> + %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx) + %yy = sext <2 x i8> %y to <2 x i64> + %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy) + %z = add i64 %z1, %z2 + ret i64 %z +} + +define i64 @add_pair_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: add_pair_v2i64_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: addp d0, v0.2d +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x) + %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %y) + %z = add i64 %z1, %z2 + ret i64 %z +} + +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V +; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V +; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X define @vdiv_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vdiv_vv_nxv1i8: @@ -895,38 +897,45 @@ } define @vdiv_vi_nxv1i64_0( %va) { -; RV32-LABEL: vdiv_vi_nxv1i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 748983 -; RV32-NEXT: addi a0, a0, -586 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 898779 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vmulh.vv v8, v8, v9 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vdiv_vi_nxv1i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 748983 +; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: lui a0, 898779 +; RV32-V-NEXT: addi a0, a0, 1755 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v9, (a0), zero +; RV32-V-NEXT: vmulh.vv v8, v8, v9 +; RV32-V-NEXT: li a0, 63 +; RV32-V-NEXT: vsrl.vx v9, v8, a0 +; RV32-V-NEXT: vsra.vi v8, v8, 1 +; RV32-V-NEXT: vadd.vv v8, v8, v9 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vdiv_vi_nxv1i64_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI58_0) -; RV64-NEXT: ld a0, %lo(.LCPI58_0)(a0) -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: ret +; ZVE64X-LABEL: vdiv_vi_nxv1i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; ZVE64X-NEXT: vdiv.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vdiv_vi_nxv1i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: lui a0, %hi(.LCPI58_0) +; RV64-V-NEXT: ld a0, %lo(.LCPI58_0)(a0) +; RV64-V-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-V-NEXT: vmulh.vx v8, v8, a0 +; RV64-V-NEXT: li a0, 63 +; RV64-V-NEXT: vsrl.vx v9, v8, a0 +; RV64-V-NEXT: vsra.vi v8, v8, 1 +; RV64-V-NEXT: vadd.vv v8, v8, v9 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = sdiv %va, %splat @@ -969,38 +978,45 @@ } define @vdiv_vi_nxv2i64_0( %va) { -; RV32-LABEL: vdiv_vi_nxv2i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 748983 -; RV32-NEXT: addi a0, a0, -586 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 898779 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vmulh.vv v8, v8, v10 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsrl.vx v10, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vdiv_vi_nxv2i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 748983 +; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: lui a0, 898779 +; RV32-V-NEXT: addi a0, a0, 1755 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v10, (a0), zero +; RV32-V-NEXT: vmulh.vv v8, v8, v10 +; RV32-V-NEXT: li a0, 63 +; RV32-V-NEXT: vsrl.vx v10, v8, a0 +; RV32-V-NEXT: vsra.vi v8, v8, 1 +; RV32-V-NEXT: vadd.vv v8, v8, v10 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vdiv_vi_nxv2i64_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI61_0) -; RV64-NEXT: ld a0, %lo(.LCPI61_0)(a0) -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: ret +; ZVE64X-LABEL: vdiv_vi_nxv2i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; ZVE64X-NEXT: vdiv.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vdiv_vi_nxv2i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: lui a0, %hi(.LCPI61_0) +; RV64-V-NEXT: ld a0, %lo(.LCPI61_0)(a0) +; RV64-V-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-V-NEXT: vmulh.vx v8, v8, a0 +; RV64-V-NEXT: li a0, 63 +; RV64-V-NEXT: vsrl.vx v10, v8, a0 +; RV64-V-NEXT: vsra.vi v8, v8, 1 +; RV64-V-NEXT: vadd.vv v8, v8, v10 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = sdiv %va, %splat @@ -1043,38 +1059,45 @@ } define @vdiv_vi_nxv4i64_0( %va) { -; RV32-LABEL: vdiv_vi_nxv4i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 748983 -; RV32-NEXT: addi a0, a0, -586 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 898779 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmulh.vv v8, v8, v12 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsrl.vx v12, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vadd.vv v8, v8, v12 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vdiv_vi_nxv4i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 748983 +; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: lui a0, 898779 +; RV32-V-NEXT: addi a0, a0, 1755 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v12, (a0), zero +; RV32-V-NEXT: vmulh.vv v8, v8, v12 +; RV32-V-NEXT: li a0, 63 +; RV32-V-NEXT: vsrl.vx v12, v8, a0 +; RV32-V-NEXT: vsra.vi v8, v8, 1 +; RV32-V-NEXT: vadd.vv v8, v8, v12 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vdiv_vi_nxv4i64_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI64_0) -; RV64-NEXT: ld a0, %lo(.LCPI64_0)(a0) -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: ret +; ZVE64X-LABEL: vdiv_vi_nxv4i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; ZVE64X-NEXT: vdiv.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vdiv_vi_nxv4i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: lui a0, %hi(.LCPI64_0) +; RV64-V-NEXT: ld a0, %lo(.LCPI64_0)(a0) +; RV64-V-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-V-NEXT: vmulh.vx v8, v8, a0 +; RV64-V-NEXT: li a0, 63 +; RV64-V-NEXT: vsrl.vx v12, v8, a0 +; RV64-V-NEXT: vsra.vi v8, v8, 1 +; RV64-V-NEXT: vadd.vv v8, v8, v12 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = sdiv %va, %splat @@ -1117,41 +1140,47 @@ } define @vdiv_vi_nxv8i64_0( %va) { -; RV32-LABEL: vdiv_vi_nxv8i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 748983 -; RV32-NEXT: addi a0, a0, -586 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 898779 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmulh.vv v8, v8, v16 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsrl.vx v16, v8, a0 -; RV32-NEXT: vsra.vi v8, v8, 1 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vdiv_vi_nxv8i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 748983 +; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: lui a0, 898779 +; RV32-V-NEXT: addi a0, a0, 1755 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v16, (a0), zero +; RV32-V-NEXT: vmulh.vv v8, v8, v16 +; RV32-V-NEXT: li a0, 63 +; RV32-V-NEXT: vsrl.vx v16, v8, a0 +; RV32-V-NEXT: vsra.vi v8, v8, 1 +; RV32-V-NEXT: vadd.vv v8, v8, v16 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vdiv_vi_nxv8i64_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI67_0) -; RV64-NEXT: ld a0, %lo(.LCPI67_0)(a0) -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsrl.vx v16, v8, a0 -; RV64-NEXT: vsra.vi v8, v8, 1 -; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: ret +; ZVE64X-LABEL: vdiv_vi_nxv8i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; ZVE64X-NEXT: vdiv.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vdiv_vi_nxv8i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: lui a0, %hi(.LCPI67_0) +; RV64-V-NEXT: ld a0, %lo(.LCPI67_0)(a0) +; RV64-V-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-V-NEXT: vmulh.vx v8, v8, a0 +; RV64-V-NEXT: li a0, 63 +; RV64-V-NEXT: vsrl.vx v16, v8, a0 +; RV64-V-NEXT: vsra.vi v8, v8, 1 +; RV64-V-NEXT: vadd.vv v8, v8, v16 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = sdiv %va, %splat ret %vc } - diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V +; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V +; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X define @vdivu_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vdivu_vv_nxv1i8: @@ -820,33 +822,40 @@ } define @vdivu_vi_nxv1i64_0( %va) { -; RV32-LABEL: vdivu_vi_nxv1i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vmulhu.vv v8, v8, v9 -; RV32-NEXT: li a0, 61 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vdivu_vi_nxv1i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: li a0, 1 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v9, (a0), zero +; RV32-V-NEXT: vmulhu.vv v8, v8, v9 +; RV32-V-NEXT: li a0, 61 +; RV32-V-NEXT: vsrl.vx v8, v8, a0 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vdivu_vi_nxv1i64_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 61 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: li a0, 61 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; ZVE64X-LABEL: vdivu_vi_nxv1i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; ZVE64X-NEXT: vdivu.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vdivu_vi_nxv1i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: li a0, 1 +; RV64-V-NEXT: slli a0, a0, 61 +; RV64-V-NEXT: addi a0, a0, 1 +; RV64-V-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-V-NEXT: vmulhu.vx v8, v8, a0 +; RV64-V-NEXT: li a0, 61 +; RV64-V-NEXT: vsrl.vx v8, v8, a0 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = udiv %va, %splat @@ -916,33 +925,40 @@ } define @vdivu_vi_nxv2i64_0( %va) { -; RV32-LABEL: vdivu_vi_nxv2i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vmulhu.vv v8, v8, v10 -; RV32-NEXT: li a0, 61 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vdivu_vi_nxv2i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: li a0, 1 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v10, (a0), zero +; RV32-V-NEXT: vmulhu.vv v8, v8, v10 +; RV32-V-NEXT: li a0, 61 +; RV32-V-NEXT: vsrl.vx v8, v8, a0 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vdivu_vi_nxv2i64_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 61 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: li a0, 61 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; ZVE64X-LABEL: vdivu_vi_nxv2i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; ZVE64X-NEXT: vdivu.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vdivu_vi_nxv2i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: li a0, 1 +; RV64-V-NEXT: slli a0, a0, 61 +; RV64-V-NEXT: addi a0, a0, 1 +; RV64-V-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-V-NEXT: vmulhu.vx v8, v8, a0 +; RV64-V-NEXT: li a0, 61 +; RV64-V-NEXT: vsrl.vx v8, v8, a0 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = udiv %va, %splat @@ -1012,33 +1028,40 @@ } define @vdivu_vi_nxv4i64_0( %va) { -; RV32-LABEL: vdivu_vi_nxv4i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmulhu.vv v8, v8, v12 -; RV32-NEXT: li a0, 61 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vdivu_vi_nxv4i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: li a0, 1 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v12, (a0), zero +; RV32-V-NEXT: vmulhu.vv v8, v8, v12 +; RV32-V-NEXT: li a0, 61 +; RV32-V-NEXT: vsrl.vx v8, v8, a0 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vdivu_vi_nxv4i64_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 61 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: li a0, 61 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; ZVE64X-LABEL: vdivu_vi_nxv4i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; ZVE64X-NEXT: vdivu.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vdivu_vi_nxv4i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: li a0, 1 +; RV64-V-NEXT: slli a0, a0, 61 +; RV64-V-NEXT: addi a0, a0, 1 +; RV64-V-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-V-NEXT: vmulhu.vx v8, v8, a0 +; RV64-V-NEXT: li a0, 61 +; RV64-V-NEXT: vsrl.vx v8, v8, a0 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = udiv %va, %splat @@ -1108,33 +1131,40 @@ } define @vdivu_vi_nxv8i64_0( %va) { -; RV32-LABEL: vdivu_vi_nxv8i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmulhu.vv v8, v8, v16 -; RV32-NEXT: li a0, 61 -; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vdivu_vi_nxv8i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: li a0, 1 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v16, (a0), zero +; RV32-V-NEXT: vmulhu.vv v8, v8, v16 +; RV32-V-NEXT: li a0, 61 +; RV32-V-NEXT: vsrl.vx v8, v8, a0 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vdivu_vi_nxv8i64_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 61 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: li a0, 61 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: ret +; ZVE64X-LABEL: vdivu_vi_nxv8i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; ZVE64X-NEXT: vdivu.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vdivu_vi_nxv8i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: li a0, 1 +; RV64-V-NEXT: slli a0, a0, 61 +; RV64-V-NEXT: addi a0, a0, 1 +; RV64-V-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-V-NEXT: vmulhu.vx v8, v8, a0 +; RV64-V-NEXT: li a0, 61 +; RV64-V-NEXT: vsrl.vx v8, v8, a0 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = udiv %va, %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V +; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V +; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X define @vrem_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vrem_vv_nxv1i8: @@ -929,42 +931,49 @@ } define @vrem_vi_nxv1i64_0( %va) { -; RV32-LABEL: vrem_vi_nxv1i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 748983 -; RV32-NEXT: addi a0, a0, -586 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 898779 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vmulh.vv v9, v8, v9 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsrl.vx v10, v9, a0 -; RV32-NEXT: vsra.vi v9, v9, 1 -; RV32-NEXT: vadd.vv v9, v9, v10 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vrem_vi_nxv1i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 748983 +; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: lui a0, 898779 +; RV32-V-NEXT: addi a0, a0, 1755 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v9, (a0), zero +; RV32-V-NEXT: vmulh.vv v9, v8, v9 +; RV32-V-NEXT: li a0, 63 +; RV32-V-NEXT: vsrl.vx v10, v9, a0 +; RV32-V-NEXT: vsra.vi v9, v9, 1 +; RV32-V-NEXT: vadd.vv v9, v9, v10 +; RV32-V-NEXT: li a0, -7 +; RV32-V-NEXT: vnmsac.vx v8, a0, v9 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vrem_vi_nxv1i64_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI56_0) -; RV64-NEXT: ld a0, %lo(.LCPI56_0)(a0) -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; RV64-NEXT: vmulh.vx v9, v8, a0 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsrl.vx v10, v9, a0 -; RV64-NEXT: vsra.vi v9, v9, 1 -; RV64-NEXT: vadd.vv v9, v9, v10 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; ZVE64X-LABEL: vrem_vi_nxv1i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; ZVE64X-NEXT: vrem.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vrem_vi_nxv1i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: lui a0, %hi(.LCPI56_0) +; RV64-V-NEXT: ld a0, %lo(.LCPI56_0)(a0) +; RV64-V-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-V-NEXT: vmulh.vx v9, v8, a0 +; RV64-V-NEXT: li a0, 63 +; RV64-V-NEXT: vsrl.vx v10, v9, a0 +; RV64-V-NEXT: vsra.vi v9, v9, 1 +; RV64-V-NEXT: vadd.vv v9, v9, v10 +; RV64-V-NEXT: li a0, -7 +; RV64-V-NEXT: vnmsac.vx v8, a0, v9 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = srem %va, %splat @@ -1007,42 +1016,49 @@ } define @vrem_vi_nxv2i64_0( %va) { -; RV32-LABEL: vrem_vi_nxv2i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 748983 -; RV32-NEXT: addi a0, a0, -586 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 898779 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vmulh.vv v10, v8, v10 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsrl.vx v12, v10, a0 -; RV32-NEXT: vsra.vi v10, v10, 1 -; RV32-NEXT: vadd.vv v10, v10, v12 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v10 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vrem_vi_nxv2i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 748983 +; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: lui a0, 898779 +; RV32-V-NEXT: addi a0, a0, 1755 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v10, (a0), zero +; RV32-V-NEXT: vmulh.vv v10, v8, v10 +; RV32-V-NEXT: li a0, 63 +; RV32-V-NEXT: vsrl.vx v12, v10, a0 +; RV32-V-NEXT: vsra.vi v10, v10, 1 +; RV32-V-NEXT: vadd.vv v10, v10, v12 +; RV32-V-NEXT: li a0, -7 +; RV32-V-NEXT: vnmsac.vx v8, a0, v10 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vrem_vi_nxv2i64_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI59_0) -; RV64-NEXT: ld a0, %lo(.LCPI59_0)(a0) -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; RV64-NEXT: vmulh.vx v10, v8, a0 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsrl.vx v12, v10, a0 -; RV64-NEXT: vsra.vi v10, v10, 1 -; RV64-NEXT: vadd.vv v10, v10, v12 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v10 -; RV64-NEXT: ret +; ZVE64X-LABEL: vrem_vi_nxv2i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; ZVE64X-NEXT: vrem.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vrem_vi_nxv2i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: lui a0, %hi(.LCPI59_0) +; RV64-V-NEXT: ld a0, %lo(.LCPI59_0)(a0) +; RV64-V-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-V-NEXT: vmulh.vx v10, v8, a0 +; RV64-V-NEXT: li a0, 63 +; RV64-V-NEXT: vsrl.vx v12, v10, a0 +; RV64-V-NEXT: vsra.vi v10, v10, 1 +; RV64-V-NEXT: vadd.vv v10, v10, v12 +; RV64-V-NEXT: li a0, -7 +; RV64-V-NEXT: vnmsac.vx v8, a0, v10 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = srem %va, %splat @@ -1085,42 +1101,49 @@ } define @vrem_vi_nxv4i64_0( %va) { -; RV32-LABEL: vrem_vi_nxv4i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 748983 -; RV32-NEXT: addi a0, a0, -586 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 898779 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmulh.vv v12, v8, v12 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsrl.vx v16, v12, a0 -; RV32-NEXT: vsra.vi v12, v12, 1 -; RV32-NEXT: vadd.vv v12, v12, v16 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v12 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vrem_vi_nxv4i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 748983 +; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: lui a0, 898779 +; RV32-V-NEXT: addi a0, a0, 1755 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v12, (a0), zero +; RV32-V-NEXT: vmulh.vv v12, v8, v12 +; RV32-V-NEXT: li a0, 63 +; RV32-V-NEXT: vsrl.vx v16, v12, a0 +; RV32-V-NEXT: vsra.vi v12, v12, 1 +; RV32-V-NEXT: vadd.vv v12, v12, v16 +; RV32-V-NEXT: li a0, -7 +; RV32-V-NEXT: vnmsac.vx v8, a0, v12 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vrem_vi_nxv4i64_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI62_0) -; RV64-NEXT: ld a0, %lo(.LCPI62_0)(a0) -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu -; RV64-NEXT: vmulh.vx v12, v8, a0 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsrl.vx v16, v12, a0 -; RV64-NEXT: vsra.vi v12, v12, 1 -; RV64-NEXT: vadd.vv v12, v12, v16 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v12 -; RV64-NEXT: ret +; ZVE64X-LABEL: vrem_vi_nxv4i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; ZVE64X-NEXT: vrem.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vrem_vi_nxv4i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: lui a0, %hi(.LCPI62_0) +; RV64-V-NEXT: ld a0, %lo(.LCPI62_0)(a0) +; RV64-V-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-V-NEXT: vmulh.vx v12, v8, a0 +; RV64-V-NEXT: li a0, 63 +; RV64-V-NEXT: vsrl.vx v16, v12, a0 +; RV64-V-NEXT: vsra.vi v12, v12, 1 +; RV64-V-NEXT: vadd.vv v12, v12, v16 +; RV64-V-NEXT: li a0, -7 +; RV64-V-NEXT: vnmsac.vx v8, a0, v12 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = srem %va, %splat @@ -1163,42 +1186,49 @@ } define @vrem_vi_nxv8i64_0( %va) { -; RV32-LABEL: vrem_vi_nxv8i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 748983 -; RV32-NEXT: addi a0, a0, -586 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lui a0, 898779 -; RV32-NEXT: addi a0, a0, 1755 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmulh.vv v16, v8, v16 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsrl.vx v24, v16, a0 -; RV32-NEXT: vsra.vi v16, v16, 1 -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vrem_vi_nxv8i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 748983 +; RV32-V-NEXT: addi a0, a0, -586 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: lui a0, 898779 +; RV32-V-NEXT: addi a0, a0, 1755 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v16, (a0), zero +; RV32-V-NEXT: vmulh.vv v16, v8, v16 +; RV32-V-NEXT: li a0, 63 +; RV32-V-NEXT: vsrl.vx v24, v16, a0 +; RV32-V-NEXT: vsra.vi v16, v16, 1 +; RV32-V-NEXT: vadd.vv v16, v16, v24 +; RV32-V-NEXT: li a0, -7 +; RV32-V-NEXT: vnmsac.vx v8, a0, v16 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vrem_vi_nxv8i64_0: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI65_0) -; RV64-NEXT: ld a0, %lo(.LCPI65_0)(a0) -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vmulh.vx v16, v8, a0 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsrl.vx v24, v16, a0 -; RV64-NEXT: vsra.vi v16, v16, 1 -; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v16 -; RV64-NEXT: ret +; ZVE64X-LABEL: vrem_vi_nxv8i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; ZVE64X-NEXT: vrem.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vrem_vi_nxv8i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: lui a0, %hi(.LCPI65_0) +; RV64-V-NEXT: ld a0, %lo(.LCPI65_0)(a0) +; RV64-V-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-V-NEXT: vmulh.vx v16, v8, a0 +; RV64-V-NEXT: li a0, 63 +; RV64-V-NEXT: vsrl.vx v24, v16, a0 +; RV64-V-NEXT: vsra.vi v16, v16, 1 +; RV64-V-NEXT: vadd.vv v16, v16, v24 +; RV64-V-NEXT: li a0, -7 +; RV64-V-NEXT: vnmsac.vx v8, a0, v16 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = srem %va, %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V +; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V +; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X define @vremu_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vremu_vv_nxv1i8: @@ -854,37 +856,44 @@ } define @vremu_vi_nxv1i64_0( %va) { -; RV32-LABEL: vremu_vi_nxv1i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vmulhu.vv v9, v8, v9 -; RV32-NEXT: li a0, 61 -; RV32-NEXT: vsrl.vx v9, v9, a0 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v9 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vremu_vi_nxv1i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: li a0, 1 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v9, (a0), zero +; RV32-V-NEXT: vmulhu.vv v9, v8, v9 +; RV32-V-NEXT: li a0, 61 +; RV32-V-NEXT: vsrl.vx v9, v9, a0 +; RV32-V-NEXT: li a0, -7 +; RV32-V-NEXT: vnmsac.vx v8, a0, v9 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vremu_vi_nxv1i64_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 61 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; RV64-NEXT: vmulhu.vx v9, v8, a0 -; RV64-NEXT: li a0, 61 -; RV64-NEXT: vsrl.vx v9, v9, a0 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v9 -; RV64-NEXT: ret +; ZVE64X-LABEL: vremu_vi_nxv1i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; ZVE64X-NEXT: vremu.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vremu_vi_nxv1i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: li a0, 1 +; RV64-V-NEXT: slli a0, a0, 61 +; RV64-V-NEXT: addi a0, a0, 1 +; RV64-V-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-V-NEXT: vmulhu.vx v9, v8, a0 +; RV64-V-NEXT: li a0, 61 +; RV64-V-NEXT: vsrl.vx v9, v9, a0 +; RV64-V-NEXT: li a0, -7 +; RV64-V-NEXT: vnmsac.vx v8, a0, v9 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = urem %va, %splat @@ -958,37 +967,44 @@ } define @vremu_vi_nxv2i64_0( %va) { -; RV32-LABEL: vremu_vi_nxv2i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vmulhu.vv v10, v8, v10 -; RV32-NEXT: li a0, 61 -; RV32-NEXT: vsrl.vx v10, v10, a0 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v10 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vremu_vi_nxv2i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: li a0, 1 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v10, (a0), zero +; RV32-V-NEXT: vmulhu.vv v10, v8, v10 +; RV32-V-NEXT: li a0, 61 +; RV32-V-NEXT: vsrl.vx v10, v10, a0 +; RV32-V-NEXT: li a0, -7 +; RV32-V-NEXT: vnmsac.vx v8, a0, v10 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vremu_vi_nxv2i64_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 61 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; RV64-NEXT: vmulhu.vx v10, v8, a0 -; RV64-NEXT: li a0, 61 -; RV64-NEXT: vsrl.vx v10, v10, a0 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v10 -; RV64-NEXT: ret +; ZVE64X-LABEL: vremu_vi_nxv2i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; ZVE64X-NEXT: vremu.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vremu_vi_nxv2i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: li a0, 1 +; RV64-V-NEXT: slli a0, a0, 61 +; RV64-V-NEXT: addi a0, a0, 1 +; RV64-V-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-V-NEXT: vmulhu.vx v10, v8, a0 +; RV64-V-NEXT: li a0, 61 +; RV64-V-NEXT: vsrl.vx v10, v10, a0 +; RV64-V-NEXT: li a0, -7 +; RV64-V-NEXT: vnmsac.vx v8, a0, v10 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = urem %va, %splat @@ -1062,37 +1078,44 @@ } define @vremu_vi_nxv4i64_0( %va) { -; RV32-LABEL: vremu_vi_nxv4i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmulhu.vv v12, v8, v12 -; RV32-NEXT: li a0, 61 -; RV32-NEXT: vsrl.vx v12, v12, a0 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v12 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vremu_vi_nxv4i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: li a0, 1 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v12, (a0), zero +; RV32-V-NEXT: vmulhu.vv v12, v8, v12 +; RV32-V-NEXT: li a0, 61 +; RV32-V-NEXT: vsrl.vx v12, v12, a0 +; RV32-V-NEXT: li a0, -7 +; RV32-V-NEXT: vnmsac.vx v8, a0, v12 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vremu_vi_nxv4i64_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 61 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu -; RV64-NEXT: vmulhu.vx v12, v8, a0 -; RV64-NEXT: li a0, 61 -; RV64-NEXT: vsrl.vx v12, v12, a0 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v12 -; RV64-NEXT: ret +; ZVE64X-LABEL: vremu_vi_nxv4i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; ZVE64X-NEXT: vremu.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vremu_vi_nxv4i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: li a0, 1 +; RV64-V-NEXT: slli a0, a0, 61 +; RV64-V-NEXT: addi a0, a0, 1 +; RV64-V-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-V-NEXT: vmulhu.vx v12, v8, a0 +; RV64-V-NEXT: li a0, 61 +; RV64-V-NEXT: vsrl.vx v12, v12, a0 +; RV64-V-NEXT: li a0, -7 +; RV64-V-NEXT: vnmsac.vx v8, a0, v12 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = urem %va, %splat @@ -1166,37 +1189,44 @@ } define @vremu_vi_nxv8i64_0( %va) { -; RV32-LABEL: vremu_vi_nxv8i64_0: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 131072 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: li a0, 1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmulhu.vv v16, v8, v16 -; RV32-NEXT: li a0, 61 -; RV32-NEXT: vsrl.vx v16, v16, a0 -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vnmsac.vx v8, a0, v16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32-V-LABEL: vremu_vi_nxv8i64_0: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -16 +; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: lui a0, 131072 +; RV32-V-NEXT: sw a0, 12(sp) +; RV32-V-NEXT: li a0, 1 +; RV32-V-NEXT: sw a0, 8(sp) +; RV32-V-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-V-NEXT: addi a0, sp, 8 +; RV32-V-NEXT: vlse64.v v16, (a0), zero +; RV32-V-NEXT: vmulhu.vv v16, v8, v16 +; RV32-V-NEXT: li a0, 61 +; RV32-V-NEXT: vsrl.vx v16, v16, a0 +; RV32-V-NEXT: li a0, -7 +; RV32-V-NEXT: vnmsac.vx v8, a0, v16 +; RV32-V-NEXT: addi sp, sp, 16 +; RV32-V-NEXT: ret ; -; RV64-LABEL: vremu_vi_nxv8i64_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 61 -; RV64-NEXT: addi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vmulhu.vx v16, v8, a0 -; RV64-NEXT: li a0, 61 -; RV64-NEXT: vsrl.vx v16, v16, a0 -; RV64-NEXT: li a0, -7 -; RV64-NEXT: vnmsac.vx v8, a0, v16 -; RV64-NEXT: ret +; ZVE64X-LABEL: vremu_vi_nxv8i64_0: +; ZVE64X: # %bb.0: +; ZVE64X-NEXT: li a0, -7 +; ZVE64X-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; ZVE64X-NEXT: vremu.vx v8, v8, a0 +; ZVE64X-NEXT: ret +; +; RV64-V-LABEL: vremu_vi_nxv8i64_0: +; RV64-V: # %bb.0: +; RV64-V-NEXT: li a0, 1 +; RV64-V-NEXT: slli a0, a0, 61 +; RV64-V-NEXT: addi a0, a0, 1 +; RV64-V-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-V-NEXT: vmulhu.vx v16, v8, a0 +; RV64-V-NEXT: li a0, 61 +; RV64-V-NEXT: vsrl.vx v16, v16, a0 +; RV64-V-NEXT: li a0, -7 +; RV64-V-NEXT: vnmsac.vx v8, a0, v16 +; RV64-V-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer %vc = urem %va, %splat diff --git a/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll b/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Show that we do differentiate between outputs of the region stored in PHINodes +; versus those stored outside of PHINodes. + +define void @function1(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br i1 true, label %first, label %test +test: + %d = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +first: + %1 = phi i32 [ %c, %test ], [ %e, %test1 ] + ret void +next: + ret void +} + +define void @function2(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br i1 true, label %first, label %test +test: + %d = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +first: + ret void +next: + %1 = add i32 %c, 1 + %2 = add i32 %e, 1 + ret void +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32* null, i32 0) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[E_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[C_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[E_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[C_LOC]], i32* [[E_LOC]], i32 1) +; CHECK-NEXT: [[C_RELOAD:%.*]] = load i32, i32* [[C_LOC]], align 4 +; CHECK-NEXT: [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]] +; CHECK: first: +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C_RELOAD]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[E_RELOAD]], 1 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i1 @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br i1 true, label [[FIRST_SPLIT:%.*]], label [[TEST:%.*]] +; CHECK: test: +; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]] +; CHECK: first.split: +; CHECK-NEXT: [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: br label [[FIRST_EXITSTUB:%.*]] +; CHECK: first.exitStub: +; CHECK-NEXT: switch i32 [[TMP3:%.*]], label [[FINAL_BLOCK_1:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]] +; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]] +; CHECK-NEXT: ] +; CHECK: next.exitStub: +; CHECK-NEXT: switch i32 [[TMP3]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_1_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_0_1: +; CHECK-NEXT: store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_1]] +; CHECK: output_block_1_0: +; CHECK-NEXT: store i32 [[C]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[E]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_1_1: +; CHECK-NEXT: store i32 [[C]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[E]], i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_1]] +; CHECK: final_block_0: +; CHECK-NEXT: ret i1 false +; CHECK: final_block_1: +; CHECK-NEXT: ret i1 true +; diff --git a/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll b/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we do not outline functions that are marked as returns +; twice, since these can alter the frame of the function and affect how the +; outliner behaves, causing miscompiles. + +; Function Attrs: optsize returns_twice +declare i32 @setjmp(i32*) local_unnamed_addr #1 +@tmp_jmpb = global [37 x i32] zeroinitializer, align 16 + +define void @function1() { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0)) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %call = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0)) + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @function2() { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0)) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %call = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0)) + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +attributes #1 = { optsize returns_twice } diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Show that we are able to extract blocks that contain PHINodes, and selectively +; store into it's respective block, creating a new block if needed. + +define void @function1(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br label %first +test: + %d = load i32, i32* %0, align 4 + br label %first +first: + ret void +} + +define void @function2(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br label %first +test: + %d = load i32, i32* %0, align 4 + br label %first +first: + %1 = phi i32 [ %c, %test ], [ %e, %test1 ] + ret void +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]], i32* null, i32 -1) +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: first: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[PHI_BLOCK:%.*]] +; CHECK: test: +; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[PHI_BLOCK]] +; CHECK: first.exitStub: +; CHECK-NEXT: switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_1_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_1_0: +; CHECK-NEXT: store i32 [[TMP3:%.*]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: phi_block: +; CHECK-NEXT: [[TMP3]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: br label [[FIRST_EXITSTUB:%.*]] +; CHECK: final_block_0: +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Show that we are able to extract blocks that contain PHINodes, and selectively +; store into it's respective block, only using if needed. + +define void @function1(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br label %first +test: + %d = load i32, i32* %0, align 4 + br label %first +first: + %1 = phi i32 [ %c, %test ], [ %e, %test1 ] + ret void +} + +define void @function2(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br label %first +test: + %d = load i32, i32* %0, align 4 + br label %first +first: + ret void +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]], i32* null, i32 -1) +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: first: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[FIRST_SPLIT:%.*]] +; CHECK: test: +; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[FIRST_SPLIT]] +; CHECK: first.split: +; CHECK-NEXT: [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: br label [[FIRST_EXITSTUB:%.*]] +; CHECK: first.exitStub: +; CHECK-NEXT: switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_0_0: +; CHECK-NEXT: store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: final_block_0: +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Show that we do not extract similar regions that would involve the splitting +; of phi nodes on exit. + +define void @function1(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br i1 true, label %first, label %test +test: + %d = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +first: + %1 = phi i32 [ %c, %test ], [ %e, %test1 ] + ret void +next: + %2 = add i32 %d, 1 + %3 = add i32 %e, 1 + ret void +} + +define void @function2(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br i1 true, label %first, label %test +test: + %d = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +first: + ret void +next: + %1 = add i32 %d, 1 + %2 = add i32 %e, 1 + ret void +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[E_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[D_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[E_LOC]], i32* [[D_LOC]], i32* [[DOTCE_LOC]], i32 0) +; CHECK-NEXT: [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4 +; CHECK-NEXT: [[D_RELOAD:%.*]] = load i32, i32* [[D_LOC]], align 4 +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: call void @outlined_ir_func_1(i32 [[D_RELOAD]], i32 [[E_RELOAD]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[D_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[E_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[D_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[E_LOC]], i32* [[D_LOC]], i32* null, i32 1) +; CHECK-NEXT: [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4 +; CHECK-NEXT: [[D_RELOAD:%.*]] = load i32, i32* [[D_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]] +; CHECK: first: +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: call void @outlined_ir_func_1(i32 [[D_RELOAD]], i32 [[E_RELOAD]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i1 @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br i1 true, label [[FIRST_SPLIT:%.*]], label [[TEST:%.*]] +; CHECK: test: +; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]] +; CHECK: first.split: +; CHECK-NEXT: [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: br label [[FIRST_EXITSTUB:%.*]] +; CHECK: first.exitStub: +; CHECK-NEXT: switch i32 [[TMP4:%.*]], label [[FINAL_BLOCK_1:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]] +; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]] +; CHECK-NEXT: ] +; CHECK: next.exitStub: +; CHECK-NEXT: switch i32 [[TMP4]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]] +; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_0_0: +; CHECK-NEXT: store i32 [[E]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: store i32 [[D]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_0_1: +; CHECK-NEXT: store i32 [[E]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[DOTCE]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_1]] +; CHECK: output_block_1_0: +; CHECK-NEXT: store i32 [[E]], i32* [[TMP1]], align 4 +; CHECK-NEXT: store i32 [[D]], i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_1_1: +; CHECK-NEXT: store i32 [[E]], i32* [[TMP1]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_1]] +; CHECK: final_block_0: +; CHECK-NEXT: ret i1 false +; CHECK: final_block_1: +; CHECK-NEXT: ret i1 true +; +; +; CHECK-LABEL: @outlined_ir_func_1( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[NEXT_TO_OUTLINE:%.*]] +; CHECK: next_to_outline: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0:%.*]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1:%.*]], 1 +; CHECK-NEXT: br label [[NEXT_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: next_after_outline.exitStub: +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Here we have multiple exits, but the different sources, same outputs are +; needed, this checks that they are compressed, and moved into the appropriate +; output blocks. + +define void @outline_outputs1() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_6 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_6 +block_6: + %diff = phi i32 [%aval, %block_4], [%a2val, %block_5] + ret void +} + +define void @outline_outputs2() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_6 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_6 +block_6: + %diff = phi i32 [%aval, %block_4], [%a2val, %block_5] + ret void +} + +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIFF_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DIFF_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[DIFF_CE_LOC]]) +; CHECK-NEXT: [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIFF_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DIFF_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[DIFF_CE_LOC]]) +; CHECK-NEXT: [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ] +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]] +; CHECK: block_2_to_outline: +; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] +; CHECK-NEXT: br label [[BLOCK_5:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] +; CHECK-NEXT: br label [[BLOCK_4:%.*]] +; CHECK: block_4: +; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[BLOCK_6_SPLIT:%.*]] +; CHECK: block_5: +; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4 +; CHECK-NEXT: br label [[BLOCK_6_SPLIT]] +; CHECK: block_6.split: +; CHECK-NEXT: [[DIFF_CE:%.*]] = phi i32 [ [[AVAL]], [[BLOCK_4]] ], [ [[A2VAL]], [[BLOCK_5]] ] +; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]] +; CHECK: block_6.exitStub: +; CHECK-NEXT: store i32 [[DIFF_CE]], i32* [[TMP4:%.*]], align 4 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll --- a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll +++ b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll @@ -37,42 +37,50 @@ } ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0]], align 4 -; CHECK-NEXT: br label [[TEST1:%.*]] -; CHECK: test1: -; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]]) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: br label [[FIRST:%.*]] -; CHECK: test: -; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]]) -; CHECK-NEXT: br label [[FIRST]] ; CHECK: first: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0]], align 4 -; CHECK-NEXT: br label [[TEST1:%.*]] -; CHECK: test1: -; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]]) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: br label [[FIRST:%.*]] -; CHECK: test: -; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]]) -; CHECK-NEXT: br label [[FIRST]] ; CHECK: first: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret void ; ; -; CHECK: define internal void @outlined_ir_func_0( +; CHECK-LABEL: define internal void @outlined_ir_func_0( ; CHECK-NEXT: newFuncRoot: -; CHECK-NEXT: br label [[TEST_TO_OUTLINE:%.*]] -; CHECK: test_to_outline: -; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[FIRST_SPLIT:%.*]] +; CHECK: test: +; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[FIRST_SPLIT]] +; CHECK: first.split: +; CHECK-NEXT: [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ] ; CHECK-NEXT: br label [[FIRST_EXITSTUB:%.*]] ; CHECK: first.exitStub: +; CHECK-NEXT: store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll b/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Show that we do not extract similar regions that would involve the splitting +; of phi nodes on exit. + +define void @function1(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +test: + %d = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +first: + %1 = phi i32 [ %c, %test ], [ %e, %test1 ] + ret void +next: + ret void +} + +define void @function2(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +test: + %d = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +first: + ret void +next: + %1 = phi i32 [ %c, %test ], [ %e, %test1 ] + ret void +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 1) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]] +; CHECK: first: +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i1 @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br i1 true, label [[FIRST_SPLIT:%.*]], label [[PHI_BLOCK:%.*]] +; CHECK: test: +; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br i1 true, label [[FIRST_SPLIT]], label [[PHI_BLOCK]] +; CHECK: first.split: +; CHECK-NEXT: [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: br label [[FIRST_EXITSTUB:%.*]] +; CHECK: first.exitStub: +; CHECK-NEXT: switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_1:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]] +; CHECK-NEXT: ] +; CHECK: next.exitStub: +; CHECK-NEXT: switch i32 [[TMP2]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_1_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_0_1: +; CHECK-NEXT: store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_1]] +; CHECK: output_block_1_0: +; CHECK-NEXT: store i32 [[TMP3:%.*]], i32* [[TMP1]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: phi_block: +; CHECK-NEXT: [[TMP3]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: br label [[NEXT_EXITSTUB:%.*]] +; CHECK: final_block_0: +; CHECK-NEXT: ret i1 false +; CHECK: final_block_1: +; CHECK-NEXT: ret i1 true +; diff --git a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Show that we are able to propogate inputs to the region into the split PHINode +; outside of the region if necessary. + +define void @function1(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + %z = add i32 %c, %c + br i1 true, label %test1, label %first +test1: + %e = load i32, i32* %0, align 4 + %1 = add i32 %c, %c + br i1 true, label %first, label %test +test: + %d = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +first: + %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ] + ret void +next: + ret void +} + +define void @function2(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + %z = mul i32 %c, %c + br i1 true, label %test1, label %first +test1: + %e = load i32, i32* %0, align 4 + %1 = add i32 %c, %c + br i1 true, label %first, label %test +test: + %d = load i32, i32* %0, align 4 + br i1 true, label %first, label %next +first: + %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ] + ret void +next: + ret void +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[Z:%.*]] = add i32 [[C]], [[C]] +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[DOTCE_LOC]]) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTCE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[Z:%.*]] = mul i32 [[C]], [[C]] +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[DOTCE_LOC]]) +; CHECK-NEXT: [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i1 @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: br i1 true, label [[TEST1:%.*]], label [[FIRST_SPLIT:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]] +; CHECK-NEXT: br i1 true, label [[FIRST_SPLIT]], label [[TEST:%.*]] +; CHECK: test: +; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]] +; CHECK: first.split: +; CHECK-NEXT: [[DOTCE:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ] +; CHECK-NEXT: br label [[FIRST_EXITSTUB:%.*]] +; CHECK: first.exitStub: +; CHECK-NEXT: store i32 [[DOTCE]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: ret i1 true +; CHECK: next.exitStub: +; CHECK-NEXT: ret i1 false +; diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -994,7 +994,7 @@ LogicalResult matchAndRewrite(GenericOp genericOp, PatternRewriter &rewriter) const override { // Only apply to elementwise linalg on tensor. - if (!genericOp.hasTensorSemantics() || + if (!genericOp.hasTensorSemantics() || genericOp.hasIndexSemantics() || genericOp.getNumParallelLoops() != genericOp.getNumLoops()) return failure(); // Only support identity output maps. It could be extended to permuations if diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir --- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir +++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir @@ -124,3 +124,30 @@ // CHECK-SAME: outs(%{{.+}} : tensor<6x5xf32>) // CHECK: tensor.expand_shape %[[OP]] // CHECK-SAME: tensor<6x5xf32> into tensor<2x3x5xf32> + +// ----- + +func @generic_op_index_semantics(%A: tensor, %B: tensor<16xi64>, %init: tensor) -> tensor { + %0 = tensor.expand_shape %A [[0, 1], [2]] + : tensor into tensor + %2 = linalg.generic {indexing_maps = [ + affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>, + affine_map<(d0, d1, d2) -> (d0, d1, d2)>], + iterator_types = ["parallel", "parallel", "parallel"]} + ins(%0, %B : tensor, tensor<16xi64>) + outs(%init : tensor) { + ^bb0(%arg1: i64, %arg2: i64, %arg3: i64): // no predecessors + %index = linalg.index 0 : index + %1 = arith.index_cast %index : index to i64 + %add = arith.addi %arg1, %1 : i64 + %s = arith.subi %add, %arg2 : i64 + linalg.yield %s : i64 + } -> tensor + return %2 : tensor +} +// CHECK: func @generic_op_index_semantics +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK: %[[RESHAPE:.+]] = tensor.expand_shape %[[ARG0]] +// CHECK: %[[RESULT:.+]] = linalg.generic +// CHECK-SAME: ins(%[[RESHAPE]] +// CHECK: return %[[RESULT]]