diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp @@ -84,19 +84,19 @@ // positives if sizeof is applied on template argument. const auto IntegerExpr = ignoringParenImpCasts(integerLiteral()); - const auto ConstantExpr = expr(ignoringParenImpCasts( + const auto ConstantExpr = ignoringParenImpCasts( anyOf(integerLiteral(), unaryOperator(hasUnaryOperand(IntegerExpr)), - binaryOperator(hasLHS(IntegerExpr), hasRHS(IntegerExpr))))); - const auto IntegerCallExpr = expr(ignoringParenImpCasts( + binaryOperator(hasLHS(IntegerExpr), hasRHS(IntegerExpr)))); + const auto IntegerCallExpr = ignoringParenImpCasts( callExpr(anyOf(hasType(isInteger()), hasType(enumType())), - unless(isInTemplateInstantiation())))); + unless(isInTemplateInstantiation()))); const auto SizeOfExpr = expr(anyOf( sizeOfExpr( has(hasUnqualifiedDesugaredType(type().bind("sizeof-arg-type")))), sizeOfExpr(has(expr(hasType( hasUnqualifiedDesugaredType(type().bind("sizeof-arg-type")))))))); - const auto SizeOfZero = expr( - sizeOfExpr(has(ignoringParenImpCasts(expr(integerLiteral(equals(0))))))); + const auto SizeOfZero = + sizeOfExpr(has(ignoringParenImpCasts(integerLiteral(equals(0))))); // Detect expression like: sizeof(ARRAYLEN); // Note: The expression 'sizeof(sizeof(0))' is a portable trick used to know @@ -111,74 +111,69 @@ // Detect sizeof(f()) if (WarnOnSizeOfIntegerExpression) { - Finder->addMatcher( - expr(sizeOfExpr(ignoringParenImpCasts(has(IntegerCallExpr)))) - .bind("sizeof-integer-call"), - this); + Finder->addMatcher(sizeOfExpr(ignoringParenImpCasts(has(IntegerCallExpr))) + .bind("sizeof-integer-call"), + this); } // Detect expression like: sizeof(this); if (WarnOnSizeOfThis) { - Finder->addMatcher( - expr(sizeOfExpr(has(ignoringParenImpCasts(expr(cxxThisExpr()))))) - .bind("sizeof-this"), - this); + Finder->addMatcher(sizeOfExpr(has(ignoringParenImpCasts(cxxThisExpr()))) + .bind("sizeof-this"), + this); } // Detect sizeof(kPtr) where kPtr is 'const char* kPtr = "abc"'; const auto CharPtrType = pointerType(pointee(isAnyCharacter())); const auto ConstStrLiteralDecl = - varDecl(isDefinition(), hasType(qualType(hasCanonicalType(CharPtrType))), + varDecl(isDefinition(), hasType(hasCanonicalType(CharPtrType)), hasInitializer(ignoringParenImpCasts(stringLiteral()))); - Finder->addMatcher(expr(sizeOfExpr(has(ignoringParenImpCasts(expr( - hasType(qualType(hasCanonicalType(CharPtrType))), - ignoringParenImpCasts(declRefExpr( - hasDeclaration(ConstStrLiteralDecl)))))))) - .bind("sizeof-charp"), - this); + Finder->addMatcher( + sizeOfExpr(has(ignoringParenImpCasts( + expr(hasType(hasCanonicalType(CharPtrType)), + ignoringParenImpCasts(declRefExpr( + hasDeclaration(ConstStrLiteralDecl))))))) + .bind("sizeof-charp"), + this); // Detect sizeof(ptr) where ptr points to an aggregate (i.e. sizeof(&S)). // Do not find it if RHS of a 'sizeof(arr) / sizeof(arr[0])' expression. - const auto ArrayExpr = expr(ignoringParenImpCasts( - expr(hasType(qualType(hasCanonicalType(arrayType())))))); + const auto ArrayExpr = + ignoringParenImpCasts(hasType(hasCanonicalType(arrayType()))); const auto ArrayCastExpr = expr(anyOf( unaryOperator(hasUnaryOperand(ArrayExpr), unless(hasOperatorName("*"))), binaryOperator(hasEitherOperand(ArrayExpr)), castExpr(hasSourceExpression(ArrayExpr)))); - const auto PointerToArrayExpr = expr(ignoringParenImpCasts(expr( - hasType(qualType(hasCanonicalType(pointerType(pointee(arrayType())))))))); - - const auto StructAddrOfExpr = - unaryOperator(hasOperatorName("&"), - hasUnaryOperand(ignoringParenImpCasts(expr( - hasType(qualType(hasCanonicalType(recordType()))))))); - const auto PointerToStructType = type(hasUnqualifiedDesugaredType( - pointerType(pointee(recordType())))); - const auto PointerToStructExpr = expr(ignoringParenImpCasts(expr( - hasType(qualType(hasCanonicalType(PointerToStructType))), - unless(cxxThisExpr())))); - - const auto ArrayOfPointersExpr = expr(ignoringParenImpCasts(expr(hasType( - qualType(hasCanonicalType(arrayType(hasElementType(pointerType())) - .bind("type-of-array-of-pointers"))))))); + const auto PointerToArrayExpr = ignoringParenImpCasts( + hasType(hasCanonicalType(pointerType(pointee(arrayType()))))); + + const auto StructAddrOfExpr = unaryOperator( + hasOperatorName("&"), hasUnaryOperand(ignoringParenImpCasts( + hasType(hasCanonicalType(recordType()))))); + const auto PointerToStructType = + hasUnqualifiedDesugaredType(pointerType(pointee(recordType()))); + const auto PointerToStructExpr = ignoringParenImpCasts(expr( + hasType(hasCanonicalType(PointerToStructType)), unless(cxxThisExpr()))); + + const auto ArrayOfPointersExpr = ignoringParenImpCasts( + hasType(hasCanonicalType(arrayType(hasElementType(pointerType())) + .bind("type-of-array-of-pointers")))); const auto ArrayOfSamePointersExpr = - expr(ignoringParenImpCasts(expr(hasType(qualType(hasCanonicalType( - arrayType(equalsBoundNode("type-of-array-of-pointers")))))))); - const auto ZeroLiteral = - expr(ignoringParenImpCasts(integerLiteral(equals(0)))); + ignoringParenImpCasts(hasType(hasCanonicalType( + arrayType(equalsBoundNode("type-of-array-of-pointers"))))); + const auto ZeroLiteral = ignoringParenImpCasts(integerLiteral(equals(0))); const auto ArrayOfSamePointersZeroSubscriptExpr = - expr(ignoringParenImpCasts(arraySubscriptExpr( - hasBase(ArrayOfSamePointersExpr), hasIndex(ZeroLiteral)))); + ignoringParenImpCasts(arraySubscriptExpr(hasBase(ArrayOfSamePointersExpr), + hasIndex(ZeroLiteral))); const auto ArrayLengthExprDenom = - expr(hasParent(expr(ignoringParenImpCasts( - binaryOperator(hasOperatorName("/"), - hasLHS(expr(ignoringParenImpCasts(expr( - sizeOfExpr(has(ArrayOfPointersExpr)))))))))), + expr(hasParent(expr(ignoringParenImpCasts(binaryOperator( + hasOperatorName("/"), hasLHS(ignoringParenImpCasts(sizeOfExpr( + has(ArrayOfPointersExpr)))))))), sizeOfExpr(has(ArrayOfSamePointersZeroSubscriptExpr))); - Finder->addMatcher(expr(anyOf(sizeOfExpr(has(expr(ignoringParenImpCasts(anyOf( + Finder->addMatcher(expr(anyOf(sizeOfExpr(has(ignoringParenImpCasts(anyOf( ArrayCastExpr, PointerToArrayExpr, - StructAddrOfExpr, PointerToStructExpr))))), + StructAddrOfExpr, PointerToStructExpr)))), sizeOfExpr(has(PointerToStructType))), unless(ArrayLengthExprDenom)) .bind("sizeof-pointer-to-aggregate"), @@ -197,8 +192,8 @@ } // Detect expression like: sizeof(expr, expr); most likely an error. - Finder->addMatcher(expr(sizeOfExpr(has(expr(ignoringParenImpCasts( - binaryOperator(hasOperatorName(","))))))) + Finder->addMatcher(sizeOfExpr(has(ignoringParenImpCasts( + binaryOperator(hasOperatorName(","))))) .bind("sizeof-comma-expr"), this); @@ -212,9 +207,9 @@ const auto ElemType = arrayType(hasElementType(recordType().bind("elem-type"))); const auto ElemPtrType = pointerType(pointee(type().bind("elem-ptr-type"))); - const auto NumType = qualType(hasCanonicalType( - type(anyOf(ElemType, ElemPtrType, type())).bind("num-type"))); - const auto DenomType = qualType(hasCanonicalType(type().bind("denom-type"))); + const auto NumType = hasCanonicalType( + type(anyOf(ElemType, ElemPtrType, type())).bind("num-type")); + const auto DenomType = hasCanonicalType(type().bind("denom-type")); Finder->addMatcher( binaryOperator(hasOperatorName("/"), @@ -246,30 +241,29 @@ // Detect strange double-sizeof expression like: sizeof(sizeof(...)); // Note: The expression 'sizeof(sizeof(0))' is accepted. - Finder->addMatcher( - expr(sizeOfExpr(has(ignoringParenImpCasts(expr( - hasSizeOfDescendant(8, expr(SizeOfExpr, unless(SizeOfZero)))))))) - .bind("sizeof-sizeof-expr"), - this); + Finder->addMatcher(sizeOfExpr(has(ignoringParenImpCasts(hasSizeOfDescendant( + 8, allOf(SizeOfExpr, unless(SizeOfZero)))))) + .bind("sizeof-sizeof-expr"), + this); // Detect sizeof in pointer arithmetic like: N * sizeof(S) == P1 - P2 or // (P1 - P2) / sizeof(S) where P1 and P2 are pointers to type S. const auto PtrDiffExpr = binaryOperator( hasOperatorName("-"), - hasLHS(expr(hasType(hasUnqualifiedDesugaredType(pointerType(pointee( - hasUnqualifiedDesugaredType(type().bind("left-ptr-type")))))))), - hasRHS(expr(hasType(hasUnqualifiedDesugaredType(pointerType(pointee( - hasUnqualifiedDesugaredType(type().bind("right-ptr-type"))))))))); + hasLHS(hasType(hasUnqualifiedDesugaredType(pointerType(pointee( + hasUnqualifiedDesugaredType(type().bind("left-ptr-type"))))))), + hasRHS(hasType(hasUnqualifiedDesugaredType(pointerType(pointee( + hasUnqualifiedDesugaredType(type().bind("right-ptr-type")))))))); Finder->addMatcher( binaryOperator( hasAnyOperatorName("==", "!=", "<", "<=", ">", ">=", "+", "-"), - hasOperands(expr(anyOf(ignoringParenImpCasts(SizeOfExpr), - ignoringParenImpCasts(binaryOperator( - hasOperatorName("*"), - hasEitherOperand( - ignoringParenImpCasts(SizeOfExpr)))))), - ignoringParenImpCasts(PtrDiffExpr))) + hasOperands( + anyOf(ignoringParenImpCasts(SizeOfExpr), + ignoringParenImpCasts(binaryOperator( + hasOperatorName("*"), + hasEitherOperand(ignoringParenImpCasts(SizeOfExpr))))), + ignoringParenImpCasts(PtrDiffExpr))) .bind("sizeof-in-ptr-arithmetic-mul"), this); diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -426,6 +426,11 @@ /// according to the field declaring type width. CODEGENOPT(AAPCSBitfieldWidth, 1, 1) +/// Sets the IEEE bit in the expected default floating point mode register. +/// Floating point opcodes that support exception flag gathering quiet and +/// propagate signaling NaN inputs per IEEE 754-2008 (AMDGPU Only) +CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -129,6 +129,8 @@ "invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">; def err_drv_argument_only_allowed_with : Error< "invalid argument '%0' only allowed with '%1'">; +def err_drv_amdgpu_ieee_without_no_honor_nans : Error< + "invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">; def err_drv_argument_not_allowed_with : Error< "invalid argument '%0' not allowed with '%1'">; def err_drv_invalid_version_number : Error< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3177,6 +3177,14 @@ Values<"command,reactor">, HelpText<"Execution model (WebAssembly only)">; +defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", + CodeGenOpts<"EmitIEEENaNCompliantInsts">, DefaultTrue, + PosFlag, + NegFlag>, Group; + def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, HelpText<"Specify code object ABI version. Defaults to 3. (AMDGPU only)">, MetaVarName<"">, Values<"2,3,4">; diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp --- a/clang/lib/AST/TemplateBase.cpp +++ b/clang/lib/AST/TemplateBase.cpp @@ -80,6 +80,26 @@ } } +static unsigned getArrayDepth(QualType type) { + unsigned count = 0; + while (const auto *arrayType = type->getAsArrayTypeUnsafe()) { + count++; + type = arrayType->getElementType(); + } + return count; +} + +static bool needsAmpersandOnTemplateArg(QualType paramType, QualType argType) { + // Generally, if the parameter type is a pointer, we must be taking the + // address of something and need a &. However, if the argument is an array, + // this could be implicit via array-to-pointer decay. + if (!paramType->isPointerType()) + return paramType->isMemberPointerType(); + if (argType->isArrayType()) + return getArrayDepth(argType) == getArrayDepth(paramType->getPointeeType()); + return true; +} + //===----------------------------------------------------------------------===// // TemplateArgument Implementation //===----------------------------------------------------------------------===// @@ -363,8 +383,10 @@ break; } } - if (!getParamTypeForDecl()->isReferenceType()) - Out << '&'; + if (auto *VD = dyn_cast(ND)) { + if (needsAmpersandOnTemplateArg(getParamTypeForDecl(), VD->getType())) + Out << "&"; + } ND->printQualifiedName(Out); break; } diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9166,6 +9166,9 @@ if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); + + if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) + F->addFnAttr("amdgpu-ieee", "false"); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -2671,8 +2671,16 @@ VersionTuple SDKVersion = SDKInfo->getVersion().withoutBuild(); CmdArgs.push_back(Args.MakeArgString(SDKVersion.getAsString())); } else { - // Use a blank SDK version if it's not present. - CmdArgs.push_back("0.0.0"); + // Use an SDK version that's matching the deployment target if the SDK + // version is missing. This is preferred over an empty SDK version (0.0.0) + // as the system's runtime might expect the linked binary to contain a + // valid SDK version in order for the binary to work correctly. It's + // reasonable to use the deployment target version as a proxy for the + // SDK version because older SDKs don't guarantee support for deployment + // targets newer than the SDK versions, so that rules out using some + // predetermined older SDK version, which leaves the deployment target + // version as the only reasonable choice. + CmdArgs.push_back(Args.MakeArgString(TargetVersion.getAsString())); } } diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -114,6 +114,7 @@ TYPE(CSharpNamedArgumentColon) \ TYPE(CSharpNullable) \ TYPE(CSharpNullCoalescing) \ + TYPE(CSharpNullCoalescingAssignment) \ TYPE(CSharpNullConditional) \ TYPE(CSharpNullConditionalLSquare) \ TYPE(CSharpGenericTypeConstraint) \ diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -54,7 +54,8 @@ bool tryMergeJSPrivateIdentifier(); bool tryMergeCSharpStringLiteral(); bool tryMergeCSharpKeywordVariables(); - bool tryMergeCSharpDoubleQuestion(); + bool tryMergeCSharpNullCoalescing(); + bool tryMergeCSharpNullCoalescingAssignment(); bool tryMergeCSharpNullConditional(); bool tryTransformCSharpForEach(); bool tryMergeForEach(); diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -97,7 +97,9 @@ return; if (tryMergeCSharpStringLiteral()) return; - if (tryMergeCSharpDoubleQuestion()) + if (tryMergeCSharpNullCoalescing()) + return; + if (tryMergeCSharpNullCoalescingAssignment()) return; if (tryMergeCSharpNullConditional()) return; @@ -310,7 +312,7 @@ "param", "property", "return", "type", }; -bool FormatTokenLexer::tryMergeCSharpDoubleQuestion() { +bool FormatTokenLexer::tryMergeCSharpNullCoalescing() { if (Tokens.size() < 2) return false; auto &FirstQuestion = *(Tokens.end() - 2); @@ -327,6 +329,24 @@ return true; } +bool FormatTokenLexer::tryMergeCSharpNullCoalescingAssignment() { + if (Tokens.size() < 2) + return false; + auto &NullCoalescing = *(Tokens.end() - 2); + auto &Equal = *(Tokens.end() - 1); + if (NullCoalescing->getType() != TT_CSharpNullCoalescing || + !Equal->is(tok::equal)) + return false; + NullCoalescing->Tok.setKind(tok::equal); // no '??=' in clang tokens. + NullCoalescing->TokenText = + StringRef(NullCoalescing->TokenText.begin(), + Equal->TokenText.end() - NullCoalescing->TokenText.begin()); + NullCoalescing->ColumnWidth += Equal->ColumnWidth; + NullCoalescing->setType(TT_CSharpNullCoalescingAssignment); + Tokens.erase(Tokens.end() - 1); + return true; +} + // Merge '?[' and '?.' pairs into single tokens. bool FormatTokenLexer::tryMergeCSharpNullConditional() { if (Tokens.size() < 2) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1579,24 +1579,29 @@ } } - if (Style.Language == FormatStyle::LK_JavaScript) { - if (Current.is(tok::exclaim)) { - if (Current.Previous && - (Keywords.IsJavaScriptIdentifier( - *Current.Previous, /* AcceptIdentifierName= */ true) || - Current.Previous->isOneOf( - tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace, - Keywords.kw_type, Keywords.kw_get, Keywords.kw_set) || - Current.Previous->Tok.isLiteral())) { - Current.setType(TT_JsNonNullAssertion); - return; - } - if (Current.Next && - Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) { + if ((Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) && + Current.is(tok::exclaim)) { + if (Current.Previous) { + bool isIdentifier = + Style.Language == FormatStyle::LK_JavaScript + ? Keywords.IsJavaScriptIdentifier( + *Current.Previous, /* AcceptIdentifierName= */ true) + : Current.Previous->is(tok::identifier); + if (isIdentifier || + Current.Previous->isOneOf( + tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace, + tok::kw_false, tok::kw_true, Keywords.kw_type, Keywords.kw_get, + Keywords.kw_set) || + Current.Previous->Tok.isLiteral()) { Current.setType(TT_JsNonNullAssertion); return; } } + if (Current.Next && + Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) { + Current.setType(TT_JsNonNullAssertion); + return; + } } // Line.MightBeFunctionDecl can only be true after the parentheses of a @@ -3186,6 +3191,10 @@ if (Left.is(TT_CSharpNullCoalescing) || Right.is(TT_CSharpNullCoalescing)) return true; + // No space before null forgiving '!'. + if (Right.is(TT_JsNonNullAssertion)) + return false; + // No space before '?['. if (Right.is(TT_CSharpNullConditionalLSquare)) return false; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1944,6 +1944,11 @@ else if (Args.hasArg(options::OPT_fno_finite_loops)) Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never; + Opts.EmitIEEENaNCompliantInsts = + Args.hasFlag(options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee); + if (!Opts.EmitIEEENaNCompliantInsts && !LangOptsRef.NoHonorNaNs) + Diags.Report(diag::err_drv_amdgpu_ieee_without_no_honor_nans); + return Diags.getNumErrors() == NumErrorsBefore; } diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -9091,6 +9091,11 @@ return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } +static __inline__ vector float __ATTRS_o_ai vec_sldw( + vector float __a, vector float __b, unsigned const int __c) { + return vec_sld(__a, __b, ((__c << 2) & 0x0F)); +} + #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai vec_sldw(vector signed long long __a, vector signed long long __b, @@ -9103,6 +9108,11 @@ unsigned const int __c) { return vec_sld(__a, __b, ((__c << 2) & 0x0F)); } + +static __inline__ vector double __ATTRS_o_ai vec_sldw( + vector double __a, vector double __b, unsigned const int __c) { + return vec_sld(__a, __b, ((__c << 2) & 0x0F)); +} #endif #ifdef __POWER9_VECTOR__ diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c @@ -0,0 +1,485 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8mf8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8:[0-9]+]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8mf8_t test_vcompress_vm_i8mf8 (vbool64_t mask, vint8mf8_t dest, vint8mf8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8mf4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8mf4_t test_vcompress_vm_i8mf4 (vbool32_t mask, vint8mf4_t dest, vint8mf4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8mf2_t test_vcompress_vm_i8mf2 (vbool16_t mask, vint8mf2_t dest, vint8mf2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vcompress_vm_i8m1 (vbool8_t mask, vint8m1_t dest, vint8m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m2_t test_vcompress_vm_i8m2 (vbool4_t mask, vint8m2_t dest, vint8m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv32i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m4_t test_vcompress_vm_i8m4 (vbool2_t mask, vint8m4_t dest, vint8m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv64i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m8_t test_vcompress_vm_i8m8 (vbool1_t mask, vint8m8_t dest, vint8m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16mf4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16mf4_t test_vcompress_vm_i16mf4 (vbool64_t mask, vint16mf4_t dest, vint16mf4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16mf2_t test_vcompress_vm_i16mf2 (vbool32_t mask, vint16mf2_t dest, vint16mf2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m1_t test_vcompress_vm_i16m1 (vbool16_t mask, vint16m1_t dest, vint16m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m2_t test_vcompress_vm_i16m2 (vbool8_t mask, vint16m2_t dest, vint16m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m4_t test_vcompress_vm_i16m4 (vbool4_t mask, vint16m4_t dest, vint16m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv32i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m8_t test_vcompress_vm_i16m8 (vbool2_t mask, vint16m8_t dest, vint16m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32mf2_t test_vcompress_vm_i32mf2 (vbool64_t mask, vint32mf2_t dest, vint32mf2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m1_t test_vcompress_vm_i32m1 (vbool32_t mask, vint32m1_t dest, vint32m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m2_t test_vcompress_vm_i32m2 (vbool16_t mask, vint32m2_t dest, vint32m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m4_t test_vcompress_vm_i32m4 (vbool8_t mask, vint32m4_t dest, vint32m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m8_t test_vcompress_vm_i32m8 (vbool4_t mask, vint32m8_t dest, vint32m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vcompress_vm_i64m1 (vbool64_t mask, vint64m1_t dest, vint64m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vcompress_vm_i64m2 (vbool32_t mask, vint64m2_t dest, vint64m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vcompress_vm_i64m4 (vbool16_t mask, vint64m4_t dest, vint64m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vcompress_vm_i64m8 (vbool8_t mask, vint64m8_t dest, vint64m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8mf8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8mf8_t test_vcompress_vm_u8mf8 (vbool64_t mask, vuint8mf8_t dest, vuint8mf8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8mf4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8mf4_t test_vcompress_vm_u8mf4 (vbool32_t mask, vuint8mf4_t dest, vuint8mf4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8mf2_t test_vcompress_vm_u8mf2 (vbool16_t mask, vuint8mf2_t dest, vuint8mf2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m1_t test_vcompress_vm_u8m1 (vbool8_t mask, vuint8m1_t dest, vuint8m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m2_t test_vcompress_vm_u8m2 (vbool4_t mask, vuint8m2_t dest, vuint8m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv32i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m4_t test_vcompress_vm_u8m4 (vbool2_t mask, vuint8m4_t dest, vuint8m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv64i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m8_t test_vcompress_vm_u8m8 (vbool1_t mask, vuint8m8_t dest, vuint8m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16mf4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16mf4_t test_vcompress_vm_u16mf4 (vbool64_t mask, vuint16mf4_t dest, vuint16mf4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16mf2_t test_vcompress_vm_u16mf2 (vbool32_t mask, vuint16mf2_t dest, vuint16mf2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m1_t test_vcompress_vm_u16m1 (vbool16_t mask, vuint16m1_t dest, vuint16m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m2_t test_vcompress_vm_u16m2 (vbool8_t mask, vuint16m2_t dest, vuint16m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m4_t test_vcompress_vm_u16m4 (vbool4_t mask, vuint16m4_t dest, vuint16m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv32i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m8_t test_vcompress_vm_u16m8 (vbool2_t mask, vuint16m8_t dest, vuint16m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32mf2_t test_vcompress_vm_u32mf2 (vbool64_t mask, vuint32mf2_t dest, vuint32mf2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m1_t test_vcompress_vm_u32m1 (vbool32_t mask, vuint32m1_t dest, vuint32m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m2_t test_vcompress_vm_u32m2 (vbool16_t mask, vuint32m2_t dest, vuint32m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m4_t test_vcompress_vm_u32m4 (vbool8_t mask, vuint32m4_t dest, vuint32m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m8_t test_vcompress_vm_u32m8 (vbool4_t mask, vuint32m8_t dest, vuint32m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vcompress_vm_u64m1 (vbool64_t mask, vuint64m1_t dest, vuint64m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vcompress_vm_u64m2 (vbool32_t mask, vuint64m2_t dest, vuint64m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vcompress_vm_u64m4 (vbool16_t mask, vuint64m4_t dest, vuint64m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vcompress_vm_u64m8 (vbool8_t mask, vuint64m8_t dest, vuint64m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32mf2_t test_vcompress_vm_f32mf2 (vbool64_t mask, vfloat32mf2_t dest, vfloat32mf2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vcompress_vm_f32m1 (vbool32_t mask, vfloat32m1_t dest, vfloat32m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m2_t test_vcompress_vm_f32m2 (vbool16_t mask, vfloat32m2_t dest, vfloat32m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m4_t test_vcompress_vm_f32m4 (vbool8_t mask, vfloat32m4_t dest, vfloat32m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m8_t test_vcompress_vm_f32m8 (vbool4_t mask, vfloat32m8_t dest, vfloat32m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1f64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vcompress_vm_f64m1 (vbool64_t mask, vfloat64m1_t dest, vfloat64m1_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2f64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m2_t test_vcompress_vm_f64m2 (vbool32_t mask, vfloat64m2_t dest, vfloat64m2_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4f64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m4_t test_vcompress_vm_f64m4 (vbool16_t mask, vfloat64m4_t dest, vfloat64m4_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8f64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) #[[ATTR8]] +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m8_t test_vcompress_vm_f64m8 (vbool8_t mask, vfloat64m8_t dest, vfloat64m8_t src, size_t vl) { + return vcompress(mask, dest, src, vl); +} + + diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c @@ -0,0 +1,483 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8mf8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8mf8_t test_vcompress_vm_i8mf8 (vbool64_t mask, vint8mf8_t dest, vint8mf8_t src, size_t vl) { + return vcompress_vm_i8mf8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8mf4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8mf4_t test_vcompress_vm_i8mf4 (vbool32_t mask, vint8mf4_t dest, vint8mf4_t src, size_t vl) { + return vcompress_vm_i8mf4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8mf2_t test_vcompress_vm_i8mf2 (vbool16_t mask, vint8mf2_t dest, vint8mf2_t src, size_t vl) { + return vcompress_vm_i8mf2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vcompress_vm_i8m1 (vbool8_t mask, vint8m1_t dest, vint8m1_t src, size_t vl) { + return vcompress_vm_i8m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m2_t test_vcompress_vm_i8m2 (vbool4_t mask, vint8m2_t dest, vint8m2_t src, size_t vl) { + return vcompress_vm_i8m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv32i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m4_t test_vcompress_vm_i8m4 (vbool2_t mask, vint8m4_t dest, vint8m4_t src, size_t vl) { + return vcompress_vm_i8m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv64i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m8_t test_vcompress_vm_i8m8 (vbool1_t mask, vint8m8_t dest, vint8m8_t src, size_t vl) { + return vcompress_vm_i8m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16mf4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16mf4_t test_vcompress_vm_i16mf4 (vbool64_t mask, vint16mf4_t dest, vint16mf4_t src, size_t vl) { + return vcompress_vm_i16mf4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16mf2_t test_vcompress_vm_i16mf2 (vbool32_t mask, vint16mf2_t dest, vint16mf2_t src, size_t vl) { + return vcompress_vm_i16mf2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m1_t test_vcompress_vm_i16m1 (vbool16_t mask, vint16m1_t dest, vint16m1_t src, size_t vl) { + return vcompress_vm_i16m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m2_t test_vcompress_vm_i16m2 (vbool8_t mask, vint16m2_t dest, vint16m2_t src, size_t vl) { + return vcompress_vm_i16m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m4_t test_vcompress_vm_i16m4 (vbool4_t mask, vint16m4_t dest, vint16m4_t src, size_t vl) { + return vcompress_vm_i16m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv32i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m8_t test_vcompress_vm_i16m8 (vbool2_t mask, vint16m8_t dest, vint16m8_t src, size_t vl) { + return vcompress_vm_i16m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32mf2_t test_vcompress_vm_i32mf2 (vbool64_t mask, vint32mf2_t dest, vint32mf2_t src, size_t vl) { + return vcompress_vm_i32mf2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m1_t test_vcompress_vm_i32m1 (vbool32_t mask, vint32m1_t dest, vint32m1_t src, size_t vl) { + return vcompress_vm_i32m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m2_t test_vcompress_vm_i32m2 (vbool16_t mask, vint32m2_t dest, vint32m2_t src, size_t vl) { + return vcompress_vm_i32m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m4_t test_vcompress_vm_i32m4 (vbool8_t mask, vint32m4_t dest, vint32m4_t src, size_t vl) { + return vcompress_vm_i32m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m8_t test_vcompress_vm_i32m8 (vbool4_t mask, vint32m8_t dest, vint32m8_t src, size_t vl) { + return vcompress_vm_i32m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vcompress_vm_i64m1 (vbool64_t mask, vint64m1_t dest, vint64m1_t src, size_t vl) { + return vcompress_vm_i64m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vcompress_vm_i64m2 (vbool32_t mask, vint64m2_t dest, vint64m2_t src, size_t vl) { + return vcompress_vm_i64m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vcompress_vm_i64m4 (vbool16_t mask, vint64m4_t dest, vint64m4_t src, size_t vl) { + return vcompress_vm_i64m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vcompress_vm_i64m8 (vbool8_t mask, vint64m8_t dest, vint64m8_t src, size_t vl) { + return vcompress_vm_i64m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8mf8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8mf8_t test_vcompress_vm_u8mf8 (vbool64_t mask, vuint8mf8_t dest, vuint8mf8_t src, size_t vl) { + return vcompress_vm_u8mf8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8mf4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8mf4_t test_vcompress_vm_u8mf4 (vbool32_t mask, vuint8mf4_t dest, vuint8mf4_t src, size_t vl) { + return vcompress_vm_u8mf4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8mf2_t test_vcompress_vm_u8mf2 (vbool16_t mask, vuint8mf2_t dest, vuint8mf2_t src, size_t vl) { + return vcompress_vm_u8mf2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m1_t test_vcompress_vm_u8m1 (vbool8_t mask, vuint8m1_t dest, vuint8m1_t src, size_t vl) { + return vcompress_vm_u8m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m2_t test_vcompress_vm_u8m2 (vbool4_t mask, vuint8m2_t dest, vuint8m2_t src, size_t vl) { + return vcompress_vm_u8m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv32i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m4_t test_vcompress_vm_u8m4 (vbool2_t mask, vuint8m4_t dest, vuint8m4_t src, size_t vl) { + return vcompress_vm_u8m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv64i8.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m8_t test_vcompress_vm_u8m8 (vbool1_t mask, vuint8m8_t dest, vuint8m8_t src, size_t vl) { + return vcompress_vm_u8m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16mf4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16mf4_t test_vcompress_vm_u16mf4 (vbool64_t mask, vuint16mf4_t dest, vuint16mf4_t src, size_t vl) { + return vcompress_vm_u16mf4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16mf2_t test_vcompress_vm_u16mf2 (vbool32_t mask, vuint16mf2_t dest, vuint16mf2_t src, size_t vl) { + return vcompress_vm_u16mf2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m1_t test_vcompress_vm_u16m1 (vbool16_t mask, vuint16m1_t dest, vuint16m1_t src, size_t vl) { + return vcompress_vm_u16m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m2_t test_vcompress_vm_u16m2 (vbool8_t mask, vuint16m2_t dest, vuint16m2_t src, size_t vl) { + return vcompress_vm_u16m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m4_t test_vcompress_vm_u16m4 (vbool4_t mask, vuint16m4_t dest, vuint16m4_t src, size_t vl) { + return vcompress_vm_u16m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv32i16.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m8_t test_vcompress_vm_u16m8 (vbool2_t mask, vuint16m8_t dest, vuint16m8_t src, size_t vl) { + return vcompress_vm_u16m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32mf2_t test_vcompress_vm_u32mf2 (vbool64_t mask, vuint32mf2_t dest, vuint32mf2_t src, size_t vl) { + return vcompress_vm_u32mf2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m1_t test_vcompress_vm_u32m1 (vbool32_t mask, vuint32m1_t dest, vuint32m1_t src, size_t vl) { + return vcompress_vm_u32m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m2_t test_vcompress_vm_u32m2 (vbool16_t mask, vuint32m2_t dest, vuint32m2_t src, size_t vl) { + return vcompress_vm_u32m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m4_t test_vcompress_vm_u32m4 (vbool8_t mask, vuint32m4_t dest, vuint32m4_t src, size_t vl) { + return vcompress_vm_u32m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16i32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m8_t test_vcompress_vm_u32m8 (vbool4_t mask, vuint32m8_t dest, vuint32m8_t src, size_t vl) { + return vcompress_vm_u32m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vcompress_vm_u64m1 (vbool64_t mask, vuint64m1_t dest, vuint64m1_t src, size_t vl) { + return vcompress_vm_u64m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vcompress_vm_u64m2 (vbool32_t mask, vuint64m2_t dest, vuint64m2_t src, size_t vl) { + return vcompress_vm_u64m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vcompress_vm_u64m4 (vbool16_t mask, vuint64m4_t dest, vuint64m4_t src, size_t vl) { + return vcompress_vm_u64m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8i64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vcompress_vm_u64m8 (vbool8_t mask, vuint64m8_t dest, vuint64m8_t src, size_t vl) { + return vcompress_vm_u64m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32mf2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32mf2_t test_vcompress_vm_f32mf2 (vbool64_t mask, vfloat32mf2_t dest, vfloat32mf2_t src, size_t vl) { + return vcompress_vm_f32mf2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vcompress_vm_f32m1 (vbool32_t mask, vfloat32m1_t dest, vfloat32m1_t src, size_t vl) { + return vcompress_vm_f32m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m2_t test_vcompress_vm_f32m2 (vbool16_t mask, vfloat32m2_t dest, vfloat32m2_t src, size_t vl) { + return vcompress_vm_f32m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m4_t test_vcompress_vm_f32m4 (vbool8_t mask, vfloat32m4_t dest, vfloat32m4_t src, size_t vl) { + return vcompress_vm_f32m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv16f32.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m8_t test_vcompress_vm_f32m8 (vbool4_t mask, vfloat32m8_t dest, vfloat32m8_t src, size_t vl) { + return vcompress_vm_f32m8(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv1f64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vcompress_vm_f64m1 (vbool64_t mask, vfloat64m1_t dest, vfloat64m1_t src, size_t vl) { + return vcompress_vm_f64m1(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv2f64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m2_t test_vcompress_vm_f64m2 (vbool32_t mask, vfloat64m2_t dest, vfloat64m2_t src, size_t vl) { + return vcompress_vm_f64m2(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv4f64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m4_t test_vcompress_vm_f64m4 (vbool16_t mask, vfloat64m4_t dest, vfloat64m4_t src, size_t vl) { + return vcompress_vm_f64m4(mask, dest, src, vl); +} + +// CHECK-RV64-LABEL: @test_vcompress_vm_f64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vcompress.nxv8f64.i64( [[DEST:%.*]], [[SRC:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m8_t test_vcompress_vm_f64m8 (vbool8_t mask, vfloat64m8_t dest, vfloat64m8_t src, size_t vl) { + return vcompress_vm_f64m8(mask, dest, src, vl); +} diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c --- a/clang/test/CodeGen/builtins-ppc-altivec.c +++ b/clang/test/CodeGen/builtins-ppc-altivec.c @@ -3759,6 +3759,18 @@ // CHECK-LE: sub nsw i32 31 // CHECK-LE: @llvm.ppc.altivec.vperm + res_vf = vec_sldw(vf, vf, 0); + // CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1 + // CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2 + // CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3 + // CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15 + // CHECK: @llvm.ppc.altivec.vperm + // CHECK-LE: sub nsw i32 16 + // CHECK-LE: sub nsw i32 17 + // CHECK-LE: sub nsw i32 18 + // CHECK-LE: sub nsw i32 31 + // CHECK-LE: @llvm.ppc.altivec.vperm + res_vsc = vec_vsldoi(vsc, vsc, 0); // CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1 // CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2 diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -1888,6 +1888,18 @@ // CHECK-LE: sub nsw i32 17 // CHECK-LE: sub nsw i32 18 // CHECK-LE: sub nsw i32 31 +// CHECK-LE: @llvm.ppc.altivec.vperm + + res_vd = vec_sldw(vd, vd, 0); +// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 1 +// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 2 +// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 3 +// CHECK: add nsw i32 {{[0-9a-zA-Z%.]+}}, 15 +// CHECK: @llvm.ppc.altivec.vperm +// CHECK-LE: sub nsw i32 16 +// CHECK-LE: sub nsw i32 17 +// CHECK-LE: sub nsw i32 18 +// CHECK-LE: sub nsw i32 31 // CHECK-LE: @llvm.ppc.altivec.vperm res_vsll = vec_sll(vsll, vuc); diff --git a/clang/test/CodeGenCXX/debug-info-codeview-display-name.cpp b/clang/test/CodeGenCXX/debug-info-codeview-display-name.cpp --- a/clang/test/CodeGenCXX/debug-info-codeview-display-name.cpp +++ b/clang/test/CodeGenCXX/debug-info-codeview-display-name.cpp @@ -86,6 +86,12 @@ template void fn_tmpl(); // CHECK-DAG: "fn_tmpl" +template +void fn_tmpl_typecheck() {} + +template void fn_tmpl_typecheck(); +// CHECK-DAG: "fn_tmpl_typecheck" + template struct ClassTemplate { A a; B b; C c; }; ClassTemplate > f; // This will only show up in normal debug builds. The space in `> >` is diff --git a/clang/test/CodeGenOpenCL/amdgpu-ieee.cl b/clang/test/CodeGenOpenCL/amdgpu-ieee.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-ieee.cl @@ -0,0 +1,47 @@ +// REQUIRES: amdgpu-registered-target +// +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=COMMON,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mno-amdgpu-ieee -menable-no-nans \ +// RUN: | FileCheck -check-prefixes=COMMON,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mno-amdgpu-ieee -cl-fast-relaxed-math \ +// RUN: | FileCheck -check-prefixes=COMMON,OFF %s + +// Check AMDGCN ISA generation. + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: | FileCheck -check-prefixes=ISA-ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -mno-amdgpu-ieee -menable-no-nans \ +// RUN: | FileCheck -check-prefixes=ISA-OFF %s + +// Check diagnostics when using -mno-amdgpu-ieee without NoHonorNaNs. + +// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mno-amdgpu-ieee 2>&1 | FileCheck -check-prefixes=DIAG %s + +// COMMON: define{{.*}} amdgpu_kernel void @kern{{.*}} [[ATTRS1:#[0-9]+]] +// ISA-ON: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}} +// ISA-ON: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}} +// ISA-ON: v_min_f32_e32 +// ISA-ON: ; IeeeMode: 1 +// ISA-OFF-NOT: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}} +// ISA-OFF-NOT: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}} +// ISA-OFF: v_min_f32_e32 +// ISA-OFF: ; IeeeMode: 0 +kernel void kern(global float *x, float y, float z) { + *x = __builtin_fmin(y, z); +} + +// COMMON: define{{.*}}void @fun() [[ATTRS2:#[0-9]+]] +void fun() { +} + +// ON-NOT: attributes [[ATTRS1]] = {{.*}} "amdgpu-ieee" +// OFF: attributes [[ATTRS1]] = {{.*}} "amdgpu-ieee"="false"{{.*}}"no-nans-fp-math"="true"{{.*}}"no-trapping-math"="true" +// ON-NOT: attributes [[ATTRS2]] = {{.*}} "amdgpu-ieee" +// OFF: attributes [[ATTRS2]] = {{.*}} "amdgpu-ieee"="false"{{.*}}"no-nans-fp-math"="true"{{.*}}"no-trapping-math"="true" + +// DIAG: invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling diff --git a/clang/test/Driver/darwin-ld-platform-version-macos.c b/clang/test/Driver/darwin-ld-platform-version-macos.c --- a/clang/test/Driver/darwin-ld-platform-version-macos.c +++ b/clang/test/Driver/darwin-ld-platform-version-macos.c @@ -48,4 +48,7 @@ // RUN: %clang -target x86_64-apple-macos10.13 -mlinker-version=520 \ // RUN: -### %t.o 2>&1 \ // RUN: | FileCheck --check-prefix=NOSDK %s -// NOSDK: "-platform_version" "macos" "10.13.0" "0.0.0" +// RUN: %clang -target x86_64-apple-darwin17 -mlinker-version=520 \ +// RUN: -### %t.o 2>&1 \ +// RUN: | FileCheck --check-prefix=NOSDK %s +// NOSDK: "-platform_version" "macos" "10.13.0" "10.13.0" diff --git a/clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp b/clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp --- a/clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype_cxx11.cpp @@ -77,3 +77,40 @@ }; void f(C ca) { ca.f({}, 0); } } + +using FourChars = const char[4]; +constexpr FourChars kEta = "Eta"; +constexpr const char kDes[4] = "Des"; +constexpr const char *kNull = "Phi"; +constexpr const char **kZero[] = {}; + +template class Column {}; +template class Dolumn {}; +template class Folumn {}; +template class Golumn {}; +template class Holumn {}; +template class Jolumn {}; +template class Iolumn {}; + +class container { +public: + int a; +}; +template class Kolumn {}; + +void lookup() { + Column().ls(); // expected-error {{().ls(); // expected-error {{().ls(); // expected-error {{().ls(); // expected-error {{().ls(); // expected-error {{().ls(); // expected-error {{<&kEta,}} + Folumn<&kDes, double>().ls(); // expected-error {{<&kDes,}} + Golumn<&kEta, double>().ls(); // expected-error {{<&kEta,}} + Golumn<&kDes, double>().ls(); // expected-error {{<&kDes,}} + Holumn<&kNull, double>().ls(); // expected-error {{<&kNull,}} + Jolumn().ls(); // expected-error {{().ls(); // expected-error {{<&kZero,}} + Kolumn<&container::a>().ls(); // expected-error {{<&container::a}} + Kolumn().ls(); // expected-error {{'s constructor as 'constexpr' isn't sufficient to prevent +// dynamic initialization, as default initialization is fine under 'constexpr' +// (but not 'constinit'). Clang at -O0, and gcc at all opt levels will emit a +// dynamic initializer for any constant-initialized variables if there is a mix +// of default-initialized and constant-initialized variables. +// +// If you're looking at this because your build failed, you probably introduced +// a new member to scudo::Allocator<> (possibly transiently) that didn't have an +// initializer. The fix is easy - just add one. +#if defined(__has_attribute) +#if __has_attribute(require_constant_initialization) +#define SCUDO_REQUIRE_CONSTANT_INITIALIZATION \ + __attribute__((__require_constant_initialization__)) +#else +#define SCUDO_REQUIRE_CONSTANT_INITIALIZATION +#endif +#endif + namespace scudo { typedef unsigned long uptr; diff --git a/compiler-rt/lib/scudo/standalone/list.h b/compiler-rt/lib/scudo/standalone/list.h --- a/compiler-rt/lib/scudo/standalone/list.h +++ b/compiler-rt/lib/scudo/standalone/list.h @@ -57,9 +57,9 @@ void checkConsistency() const; protected: - uptr Size; - T *First; - T *Last; + uptr Size = 0; + T *First = nullptr; + T *Last = nullptr; }; template void IntrusiveList::checkConsistency() const { diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h --- a/compiler-rt/lib/scudo/standalone/local_cache.h +++ b/compiler-rt/lib/scudo/standalone/local_cache.h @@ -138,9 +138,9 @@ uptr ClassSize; CompactPtrT Chunks[2 * TransferBatch::MaxNumCached]; }; - PerClass PerClassArray[NumClasses]; + PerClass PerClassArray[NumClasses] = {}; LocalStats Stats; - SizeClassAllocator *Allocator; + SizeClassAllocator *Allocator = nullptr; ALWAYS_INLINE void initCacheMaybe(PerClass *C) { if (LIKELY(C->MaxCount)) diff --git a/compiler-rt/lib/scudo/standalone/mutex.h b/compiler-rt/lib/scudo/standalone/mutex.h --- a/compiler-rt/lib/scudo/standalone/mutex.h +++ b/compiler-rt/lib/scudo/standalone/mutex.h @@ -48,9 +48,9 @@ static constexpr u8 NumberOfYields = 8U; #if SCUDO_LINUX - atomic_u32 M; + atomic_u32 M = {}; #elif SCUDO_FUCHSIA - sync_mutex_t M; + sync_mutex_t M = {}; #endif void lockSlow(); diff --git a/compiler-rt/lib/scudo/standalone/options.h b/compiler-rt/lib/scudo/standalone/options.h --- a/compiler-rt/lib/scudo/standalone/options.h +++ b/compiler-rt/lib/scudo/standalone/options.h @@ -44,9 +44,8 @@ } struct AtomicOptions { - atomic_u32 Val; + atomic_u32 Val = {}; -public: Options load() const { return Options{atomic_load_relaxed(&Val)}; } void clear(OptionBit Opt) { diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -489,17 +489,17 @@ return TotalReleasedBytes; } - SizeClassInfo SizeClassInfoArray[NumClasses]; + SizeClassInfo SizeClassInfoArray[NumClasses] = {}; // Track the regions in use, 0 is unused, otherwise store ClassId + 1. - ByteMap PossibleRegions; - atomic_s32 ReleaseToOsIntervalMs; + ByteMap PossibleRegions = {}; + atomic_s32 ReleaseToOsIntervalMs = {}; // Unless several threads request regions simultaneously from different size // classes, the stash rarely contains more than 1 entry. static constexpr uptr MaxStashedRegions = 4; HybridMutex RegionsStashMutex; - uptr NumberOfStashedRegions; - uptr RegionsStash[MaxStashedRegions]; + uptr NumberOfStashedRegions = 0; + uptr RegionsStash[MaxStashedRegions] = {}; }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -285,24 +285,24 @@ struct UnpaddedRegionInfo { HybridMutex Mutex; SinglyLinkedList FreeList; - uptr RegionBeg; - RegionStats Stats; - u32 RandState; - uptr MappedUser; // Bytes mapped for user memory. - uptr AllocatedUser; // Bytes allocated for user memory. - MapPlatformData Data; - ReleaseToOsInfo ReleaseInfo; - bool Exhausted; + uptr RegionBeg = 0; + RegionStats Stats = {}; + u32 RandState = 0; + uptr MappedUser = 0; // Bytes mapped for user memory. + uptr AllocatedUser = 0; // Bytes allocated for user memory. + MapPlatformData Data = {}; + ReleaseToOsInfo ReleaseInfo = {}; + bool Exhausted = false; }; struct RegionInfo : UnpaddedRegionInfo { char Padding[SCUDO_CACHE_LINE_SIZE - - (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)]; + (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)] = {}; }; static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); - uptr PrimaryBase; - MapPlatformData Data; - atomic_s32 ReleaseToOsIntervalMs; + uptr PrimaryBase = 0; + MapPlatformData Data = {}; + atomic_s32 ReleaseToOsIntervalMs = {}; alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; RegionInfo *getRegionInfo(uptr ClassId) { diff --git a/compiler-rt/lib/scudo/standalone/quarantine.h b/compiler-rt/lib/scudo/standalone/quarantine.h --- a/compiler-rt/lib/scudo/standalone/quarantine.h +++ b/compiler-rt/lib/scudo/standalone/quarantine.h @@ -161,7 +161,7 @@ private: SinglyLinkedList List; - atomic_uptr Size; + atomic_uptr Size = {}; void addToSize(uptr add) { atomic_store_relaxed(&Size, getSize() + add); } void subFromSize(uptr sub) { atomic_store_relaxed(&Size, getSize() - sub); } @@ -246,9 +246,9 @@ alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex; CacheT Cache; alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex; - atomic_uptr MinSize; - atomic_uptr MaxSize; - alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize; + atomic_uptr MinSize = {}; + atomic_uptr MaxSize = {}; + alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize = {}; void NOINLINE recycle(uptr MinSize, Callback Cb) { CacheT Tmp; diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -9,9 +9,12 @@ #ifndef SCUDO_SECONDARY_H_ #define SCUDO_SECONDARY_H_ +#include "chunk.h" #include "common.h" #include "list.h" +#include "memtag.h" #include "mutex.h" +#include "options.h" #include "stats.h" #include "string_utils.h" @@ -279,13 +282,15 @@ Config::SecondaryCacheMinReleaseToOsIntervalMs); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; - } else if (O == Option::MaxCacheEntriesCount) { + } + if (O == Option::MaxCacheEntriesCount) { const u32 MaxCount = static_cast(Value); if (MaxCount > Config::SecondaryCacheEntriesArraySize) return false; atomic_store_relaxed(&MaxEntriesCount, MaxCount); return true; - } else if (O == Option::MaxCacheEntrySize) { + } + if (O == Option::MaxCacheEntrySize) { atomic_store_relaxed(&MaxEntrySize, static_cast(Value)); return true; } @@ -377,16 +382,16 @@ } HybridMutex Mutex; - u32 EntriesCount; - u32 QuarantinePos; - atomic_u32 MaxEntriesCount; - atomic_uptr MaxEntrySize; - u64 OldestTime; - u32 IsFullEvents; - atomic_s32 ReleaseToOsIntervalMs; - - CachedBlock Entries[Config::SecondaryCacheEntriesArraySize]; - CachedBlock Quarantine[Config::SecondaryCacheQuarantineSize]; + u32 EntriesCount = 0; + u32 QuarantinePos = 0; + atomic_u32 MaxEntriesCount = {}; + atomic_uptr MaxEntrySize = {}; + u64 OldestTime = 0; + u32 IsFullEvents = 0; + atomic_s32 ReleaseToOsIntervalMs = {}; + + CachedBlock Entries[Config::SecondaryCacheEntriesArraySize] = {}; + CachedBlock Quarantine[Config::SecondaryCacheQuarantineSize] = {}; }; template class MapAllocator { @@ -451,11 +456,11 @@ HybridMutex Mutex; DoublyLinkedList InUseBlocks; - uptr AllocatedBytes; - uptr FreedBytes; - uptr LargestSize; - u32 NumberOfAllocs; - u32 NumberOfFrees; + uptr AllocatedBytes = 0; + uptr FreedBytes = 0; + uptr LargestSize = 0; + u32 NumberOfAllocs = 0; + u32 NumberOfFrees = 0; LocalStats Stats; }; diff --git a/compiler-rt/lib/scudo/standalone/stack_depot.h b/compiler-rt/lib/scudo/standalone/stack_depot.h --- a/compiler-rt/lib/scudo/standalone/stack_depot.h +++ b/compiler-rt/lib/scudo/standalone/stack_depot.h @@ -40,7 +40,7 @@ class StackDepot { HybridMutex RingEndMu; - u32 RingEnd; + u32 RingEnd = 0; // This data structure stores a stack trace for each allocation and // deallocation when stack trace recording is enabled, that may be looked up @@ -70,7 +70,7 @@ #endif static const uptr TabSize = 1 << TabBits; static const uptr TabMask = TabSize - 1; - atomic_u32 Tab[TabSize]; + atomic_u32 Tab[TabSize] = {}; #ifdef SCUDO_FUZZ static const uptr RingBits = 4; @@ -79,7 +79,7 @@ #endif static const uptr RingSize = 1 << RingBits; static const uptr RingMask = RingSize - 1; - atomic_u64 Ring[RingSize]; + atomic_u64 Ring[RingSize] = {}; public: // Insert hash of the stack trace [Begin, End) into the stack depot, and diff --git a/compiler-rt/lib/scudo/standalone/stats.h b/compiler-rt/lib/scudo/standalone/stats.h --- a/compiler-rt/lib/scudo/standalone/stats.h +++ b/compiler-rt/lib/scudo/standalone/stats.h @@ -46,11 +46,11 @@ uptr get(StatType I) const { return atomic_load_relaxed(&StatsArray[I]); } - LocalStats *Next; - LocalStats *Prev; + LocalStats *Next = nullptr; + LocalStats *Prev = nullptr; private: - atomic_uptr StatsArray[StatCount]; + atomic_uptr StatsArray[StatCount] = {}; }; // Global stats, used for aggregation and querying. diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h --- a/compiler-rt/lib/scudo/standalone/tsd.h +++ b/compiler-rt/lib/scudo/standalone/tsd.h @@ -26,7 +26,7 @@ template struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { typename Allocator::CacheT Cache; typename Allocator::QuarantineCacheT QuarantineCache; - u8 DestructorIterations; + u8 DestructorIterations = 0; void initLinkerInitialized(Allocator *Instance) { Instance->initCache(&Cache); @@ -59,7 +59,7 @@ private: HybridMutex Mutex; - atomic_uptr Precedence; + atomic_uptr Precedence = {}; }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -108,9 +108,9 @@ Instance->callPostInitCallback(); } - pthread_key_t PThreadKey; - bool Initialized; - atomic_u8 Disabled; + pthread_key_t PThreadKey = {}; + bool Initialized = false; + atomic_u8 Disabled = {}; TSD FallbackTSD; HybridMutex Mutex; static thread_local ThreadState State; diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp @@ -26,6 +26,7 @@ // Export the static allocator so that the C++ wrappers can access it. // Technically we could have a completely separated heap for C & C++ but in // reality the amount of cross pollination between the two is staggering. +SCUDO_REQUIRE_CONSTANT_INITIALIZATION scudo::Allocator SCUDO_ALLOCATOR; #include "wrappers_c.inc" diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp @@ -23,6 +23,7 @@ #define SCUDO_ALLOCATOR Allocator extern "C" void SCUDO_PREFIX(malloc_postinit)(); +SCUDO_REQUIRE_CONSTANT_INITIALIZATION static scudo::Allocator SCUDO_ALLOCATOR; @@ -36,6 +37,7 @@ #define SCUDO_ALLOCATOR SvelteAllocator extern "C" void SCUDO_PREFIX(malloc_postinit)(); +SCUDO_REQUIRE_CONSTANT_INITIALIZATION static scudo::Allocator SCUDO_ALLOCATOR; diff --git a/libcxx/include/__config b/libcxx/include/__config --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -106,6 +106,8 @@ # define _LIBCPP_ABI_OPTIMIZED_FUNCTION // All the regex constants must be distinct and nonzero. # define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO +// Use raw pointers, not wrapped ones, for std::span's iterator type. +# define _LIBCPP_ABI_SPAN_POINTER_ITERATORS // Re-worked external template instantiations for std::string with a focus on // performance and fast-path inlining. # define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION @@ -848,7 +850,7 @@ #define _LIBCPP_HAS_NO_CONCEPTS #endif -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_MSC_VER) +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_HAS_NO_CONCEPTS) #define _LIBCPP_HAS_NO_RANGES #endif diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h --- a/libcxx/include/__iterator/concepts.h +++ b/libcxx/include/__iterator/concepts.h @@ -33,12 +33,12 @@ // [iterator.concept.readable] template concept __indirectly_readable_impl = - requires(const _In __in) { + requires(const _In __i) { typename iter_value_t<_In>; typename iter_reference_t<_In>; typename iter_rvalue_reference_t<_In>; - { *__in } -> same_as >; - { ranges::iter_move(__in) } -> same_as >; + { *__i } -> same_as >; + { ranges::iter_move(__i) } -> same_as >; } && common_reference_with&&, iter_value_t<_In>&> && common_reference_with&&, iter_rvalue_reference_t<_In>&&> && diff --git a/libcxx/include/span b/libcxx/include/span --- a/libcxx/include/span +++ b/libcxx/include/span @@ -200,7 +200,11 @@ using const_pointer = const _Tp *; using reference = _Tp &; using const_reference = const _Tp &; - using iterator = __wrap_iter; +#if (_LIBCPP_DEBUG_LEVEL == 2) || defined(_LIBCPP_ABI_SPAN_POINTER_ITERATORS) + using iterator = pointer; +#else + using iterator = __wrap_iter; +#endif using reverse_iterator = _VSTD::reverse_iterator; static constexpr size_type extent = _Extent; @@ -375,7 +379,11 @@ using const_pointer = const _Tp *; using reference = _Tp &; using const_reference = const _Tp &; - using iterator = __wrap_iter; +#if (_LIBCPP_DEBUG_LEVEL == 2) || defined(_LIBCPP_ABI_SPAN_POINTER_ITERATORS) + using iterator = pointer; +#else + using iterator = __wrap_iter; +#endif using reverse_iterator = _VSTD::reverse_iterator; static constexpr size_type extent = dynamic_extent; diff --git a/libcxx/test/libcxx/iterators/iterator.concepts/iterator.concept.input/subsumption.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.concepts/iterator.concept.input/subsumption.compile.pass.cpp --- a/libcxx/test/libcxx/iterators/iterator.concepts/iterator.concept.input/subsumption.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.concepts/iterator.concept.input/subsumption.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept input_iterator; diff --git a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_bidirectional_iterator.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_bidirectional_iterator.compile.pass.cpp --- a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_bidirectional_iterator.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_bidirectional_iterator.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept __iterator_traits_detail::__cpp17_bidirectional_iterator; diff --git a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_forward_iterator.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_forward_iterator.compile.pass.cpp --- a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_forward_iterator.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_forward_iterator.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept __iterator_traits_detail::__cpp17_forward_iterator; diff --git a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_input_iterator.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_input_iterator.compile.pass.cpp --- a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_input_iterator.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_input_iterator.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept __iterator_traits_detail::__cpp17_input_iterator; diff --git a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_iterator.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_iterator.compile.pass.cpp --- a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_iterator.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_iterator.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept __iterator_traits_detail::__cpp17_iterator; diff --git a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_random_access_iterator.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_random_access_iterator.compile.pass.cpp --- a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_random_access_iterator.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/legacy_random_access_iterator.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept __iterator_traits_detail::__cpp17_random_access_iterator; diff --git a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/locale_dependent.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/locale_dependent.compile.pass.cpp --- a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/locale_dependent.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.assoc.types/iterator.traits/locale_dependent.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // REQUIRES: locale.en_US.UTF-8 diff --git a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.concepts/integer_like.compile.pass.cpp b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.concepts/integer_like.compile.pass.cpp --- a/libcxx/test/libcxx/iterators/iterator.requirements/iterator.concepts/integer_like.compile.pass.cpp +++ b/libcxx/test/libcxx/iterators/iterator.requirements/iterator.concepts/integer_like.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang #include diff --git a/libcxx/test/std/containers/associative/map/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/map/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/associative/map/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/map/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/associative/map/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/map/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/associative/map/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/map/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // map diff --git a/libcxx/test/std/containers/associative/multimap/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/multimap/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/associative/multimap/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/associative/multimap/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/multimap/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/associative/multimap/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // multimap diff --git a/libcxx/test/std/containers/associative/multiset/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/multiset/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/associative/multiset/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/associative/multiset/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/multiset/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/associative/multiset/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // multiset diff --git a/libcxx/test/std/containers/associative/set/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/set/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/associative/set/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/set/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/associative/set/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/set/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/associative/set/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/set/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // set diff --git a/libcxx/test/std/containers/sequences/array/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/array/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/array/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/array/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/sequences/array/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/array/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/array/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/array/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // array diff --git a/libcxx/test/std/containers/sequences/deque/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/deque/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/deque/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/sequences/deque/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/deque/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/deque/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // deque diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator diff --git a/libcxx/test/std/containers/sequences/forwardlist/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/forwardlist/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // forward_list diff --git a/libcxx/test/std/containers/sequences/list/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/list/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/list/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/sequences/list/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/list/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/list/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // list diff --git a/libcxx/test/std/containers/sequences/vector.bool/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/vector.bool/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/sequences/vector.bool/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/vector.bool/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // vector diff --git a/libcxx/test/std/containers/sequences/vector/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/vector/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/containers/sequences/vector/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/sequences/vector/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // vector diff --git a/libcxx/test/std/containers/unord/unord.map/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/unord/unord.map/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, local_iterator, const_local_iterator diff --git a/libcxx/test/std/containers/unord/unord.map/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/unord/unord.map/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unordered_map diff --git a/libcxx/test/std/containers/unord/unord.multimap/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/unord/unord.multimap/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, local_iterator, const_local_iterator diff --git a/libcxx/test/std/containers/unord/unord.multimap/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/unord/unord.multimap/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unordered_multimap diff --git a/libcxx/test/std/containers/unord/unord.multiset/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/unord/unord.multiset/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, local_iterator, const_local_iterator diff --git a/libcxx/test/std/containers/unord/unord.multiset/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/unord/unord.multiset/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unordered_multiset diff --git a/libcxx/test/std/containers/unord/unord.set/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/unord/unord.set/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, local_iterator, const_local_iterator diff --git a/libcxx/test/std/containers/unord/unord.set/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/unord/unord.set/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unordered_multiset diff --git a/libcxx/test/std/containers/views/enable_borrowed_range.compile.pass.cpp b/libcxx/test/std/containers/views/enable_borrowed_range.compile.pass.cpp --- a/libcxx/test/std/containers/views/enable_borrowed_range.compile.pass.cpp +++ b/libcxx/test/std/containers/views/enable_borrowed_range.compile.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/containers/views/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/views/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/views/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/views/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // span diff --git a/libcxx/test/std/containers/views/span.cons/deduct.pass.cpp b/libcxx/test/std/containers/views/span.cons/deduct.pass.cpp --- a/libcxx/test/std/containers/views/span.cons/deduct.pass.cpp +++ b/libcxx/test/std/containers/views/span.cons/deduct.pass.cpp @@ -7,7 +7,6 @@ // //===---------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: LIBCXX-DEBUG-FIXME // diff --git a/libcxx/test/std/containers/views/span.iterators/begin.pass.cpp b/libcxx/test/std/containers/views/span.iterators/begin.pass.cpp --- a/libcxx/test/std/containers/views/span.iterators/begin.pass.cpp +++ b/libcxx/test/std/containers/views/span.iterators/begin.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: LIBCXX-DEBUG-FIXME // diff --git a/libcxx/test/std/containers/views/span.iterators/end.pass.cpp b/libcxx/test/std/containers/views/span.iterators/end.pass.cpp --- a/libcxx/test/std/containers/views/span.iterators/end.pass.cpp +++ b/libcxx/test/std/containers/views/span.iterators/end.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: LIBCXX-DEBUG-FIXME // diff --git a/libcxx/test/std/containers/views/span.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/views/span.iterators/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/containers/views/span.iterators/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/views/span.iterators/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, reverse_iterator diff --git a/libcxx/test/std/containers/views/span.iterators/rbegin.pass.cpp b/libcxx/test/std/containers/views/span.iterators/rbegin.pass.cpp --- a/libcxx/test/std/containers/views/span.iterators/rbegin.pass.cpp +++ b/libcxx/test/std/containers/views/span.iterators/rbegin.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: LIBCXX-DEBUG-FIXME // diff --git a/libcxx/test/std/containers/views/span.iterators/rend.pass.cpp b/libcxx/test/std/containers/views/span.iterators/rend.pass.cpp --- a/libcxx/test/std/containers/views/span.iterators/rend.pass.cpp +++ b/libcxx/test/std/containers/views/span.iterators/rend.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: LIBCXX-DEBUG-FIXME // diff --git a/libcxx/test/std/containers/views/span.sub/first.pass.cpp b/libcxx/test/std/containers/views/span.sub/first.pass.cpp --- a/libcxx/test/std/containers/views/span.sub/first.pass.cpp +++ b/libcxx/test/std/containers/views/span.sub/first.pass.cpp @@ -7,7 +7,6 @@ // //===---------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: LIBCXX-DEBUG-FIXME // diff --git a/libcxx/test/std/containers/views/span.sub/last.pass.cpp b/libcxx/test/std/containers/views/span.sub/last.pass.cpp --- a/libcxx/test/std/containers/views/span.sub/last.pass.cpp +++ b/libcxx/test/std/containers/views/span.sub/last.pass.cpp @@ -7,7 +7,6 @@ // //===---------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: LIBCXX-DEBUG-FIXME // diff --git a/libcxx/test/std/containers/views/span.sub/subspan.pass.cpp b/libcxx/test/std/containers/views/span.sub/subspan.pass.cpp --- a/libcxx/test/std/containers/views/span.sub/subspan.pass.cpp +++ b/libcxx/test/std/containers/views/span.sub/subspan.pass.cpp @@ -7,7 +7,6 @@ // //===---------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: LIBCXX-DEBUG-FIXME // diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // directory_iterator, recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.append.pass.cpp @@ -11,8 +11,6 @@ // These tests require locale for non-char paths // UNSUPPORTED: libcpp-has-no-localization -// XFAIL: LIBCXX-WINDOWS-FIXME - // // class path @@ -197,6 +195,9 @@ // required. // On Windows, the append method is more complex and uses intermediate // path objects, which causes extra allocations. + // In DLL builds on Windows, the overridden operator new won't pick up + // allocations done within the DLL, so the RequireAllocationGuard below + // won't necessarily see allocations in the cases where they're expected. #ifdef _WIN32 bool DisableAllocations = false; #else @@ -208,6 +209,7 @@ { RequireAllocationGuard g; // requires 1 or more allocations occur by default if (DisableAllocations) g.requireExactly(0); + else TEST_ONLY_WIN32_DLL(g.requireAtLeast(0)); LHS /= RHS; } assert(PathEq(LHS, E)); @@ -219,6 +221,7 @@ { RequireAllocationGuard g; if (DisableAllocations) g.requireExactly(0); + else TEST_ONLY_WIN32_DLL(g.requireAtLeast(0)); LHS.append(RHS, REnd); } assert(PathEq(LHS, E)); diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: LIBCXX-WINDOWS-FIXME - // // class path @@ -30,7 +28,9 @@ assert(globalMemCounter.checkOutstandingNewEq(0)); const std::string s("we really really really really really really really " "really really long string so that we allocate"); - assert(globalMemCounter.checkOutstandingNewEq(1)); + // On windows, the operator new from count_new.h can't override the default + // operator for calls within the libc++ DLL. + TEST_NOT_WIN32_DLL(assert(globalMemCounter.checkOutstandingNewEq(1))); const fs::path::string_type ps(s.begin(), s.end()); path p(s); { diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.concat.pass.cpp @@ -11,8 +11,6 @@ // These tests require locale for non-char paths // UNSUPPORTED: libcpp-has-no-localization -// XFAIL: LIBCXX-WINDOWS-FIXME - // // class path @@ -142,6 +140,10 @@ // code_cvt conversions. // For the path native type, no allocations will be performed because no // conversion is required. + + // In DLL builds on Windows, the overridden operator new won't pick up + // allocations done within the DLL, so the RequireAllocationGuard below + // won't necessarily see allocations in the cases where they're expected. bool DisableAllocations = std::is_same::value; { path LHS(L); PathReserve(LHS, ReserveSize); @@ -149,6 +151,7 @@ { RequireAllocationGuard g; // requires 1 or more allocations occur by default if (DisableAllocations) g.requireExactly(0); + else TEST_ONLY_WIN32_DLL(g.requireAtLeast(0)); LHS += RHS; } assert(LHS == E); @@ -160,6 +163,7 @@ { RequireAllocationGuard g; if (DisableAllocations) g.requireExactly(0); + else TEST_ONLY_WIN32_DLL(g.requireAtLeast(0)); LHS.concat(RHS, REnd); } assert(LHS == E); diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp @@ -8,8 +8,6 @@ // UNSUPPORTED: c++03 -// XFAIL: LIBCXX-WINDOWS-FIXME - // // class path @@ -30,7 +28,9 @@ assert(globalMemCounter.checkOutstandingNewEq(0)); const std::string s("we really really really really really really really " "really really long string so that we allocate"); - assert(globalMemCounter.checkOutstandingNewEq(1)); + // On windows, the operator new from count_new.h can't override the default + // operator for calls within the libc++ DLL. + TEST_NOT_WIN32_DLL(assert(globalMemCounter.checkOutstandingNewEq(1))); const fs::path::string_type ps(s.begin(), s.end()); path p(s); { diff --git a/libcxx/test/std/input.output/filesystems/class.path/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/input.output/filesystems/class.path/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // path diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/cxx20_iterator_traits.compile.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/cxx20_iterator_traits.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/cxx20_iterator_traits.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/cxx20_iterator_traits.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // struct iterator_traits; diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/iter_reference_t.compile.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/iter_reference_t.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/iter_reference_t.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/iter_reference_t.compile.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // template // using iter_reference_t = decltype(*declval()); diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // struct incrementable_traits; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/iter_difference_t.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/iter_difference_t.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/iter_difference_t.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/iter_difference_t.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // using iter_difference_t; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/indirectly_readable_traits.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/indirectly_readable_traits.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/indirectly_readable_traits.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/indirectly_readable_traits.compile.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // template // struct indirectly_readable_traits; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/iter_value_t.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/iter_value_t.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/iter_value_t.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.assoc.types/readable.traits/iter_value_t.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // using iter_value_t; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/incrementable.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/incrementable.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/incrementable.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/incrementable.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept indirectly_readable; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/subsumption.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/subsumption.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/subsumption.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.inc/subsumption.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept incrementable; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/input_iterator.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/input_iterator.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/input_iterator.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.input/input_iterator.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept input_iterator; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/input_or_output_iterator.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/input_or_output_iterator.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/input_or_output_iterator.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/input_or_output_iterator.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept input_or_output_iterator; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/subsumption.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/subsumption.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/subsumption.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.iterator/subsumption.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept input_or_output_iterator; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept indirectly_readable; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept sentinel_for; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.subsumption.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.subsumption.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.subsumption.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sentinel_for.subsumption.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept sentinel_for; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sized_sentinel_for.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sized_sentinel_for.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sized_sentinel_for.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.sentinel/sized_sentinel_for.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // [iterator.concept.sizedsentinel], concept sized_sentinel_for // diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/subsumption.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/subsumption.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/subsumption.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/subsumption.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept weakly_incrementable; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept std::weakly_incrementable; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.writable/indirectly_writable.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.writable/indirectly_writable.compile.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.writable/indirectly_writable.compile.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.writable/indirectly_writable.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept indirectly_writable; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.nodiscard.verify.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.nodiscard.verify.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.nodiscard.verify.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.nodiscard.verify.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // Test the [[nodiscard]] extension in libc++. // REQUIRES: libc++ diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_move.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // unspecified iter_move; diff --git a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_rvalue_reference_t.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_rvalue_reference_t.pass.cpp --- a/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_rvalue_reference_t.pass.cpp +++ b/libcxx/test/std/iterators/iterator.requirements/iterator.cust/iterator.cust.move/iter_rvalue_reference_t.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // using iter_rvalue_reference; diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // back_insert_iterator diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // front_insert_iterator diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // insert_iterator diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // move_iterator diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // reverse_iterator diff --git a/libcxx/test/std/iterators/stream.iterators/istream.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istream.iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/stream.iterators/istream.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/istream.iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // istream_iterator diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // istreambuf_iterator diff --git a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // ostream_iterator diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // ostreambuf_iterator diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp --- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp @@ -12,8 +12,6 @@ // ~ctype(); -// XFAIL: LIBCXX-WINDOWS-FIXME - #include #include @@ -39,7 +37,9 @@ new std::ctype(new std::ctype::mask[256], true)); assert(globalMemCounter.checkDeleteArrayCalledEq(0)); } - assert(globalMemCounter.checkDeleteArrayCalledEq(1)); + // On windows, the operator new from count_new.h can't override the default + // operator for calls within the libc++ DLL. + TEST_NOT_WIN32_DLL(assert(globalMemCounter.checkDeleteArrayCalledEq(1))); return 0; } diff --git a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp --- a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // std::ranges::begin diff --git a/libcxx/test/std/ranges/range.access/range.access.begin/incomplete.compile.verify.cpp b/libcxx/test/std/ranges/range.access/range.access.begin/incomplete.compile.verify.cpp --- a/libcxx/test/std/ranges/range.access/range.access.begin/incomplete.compile.verify.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.begin/incomplete.compile.verify.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unspecified begin; diff --git a/libcxx/test/std/ranges/range.access/range.access.cbegin/incomplete.compile.verify copy.cpp b/libcxx/test/std/ranges/range.access/range.access.cbegin/incomplete.compile.verify copy.cpp --- a/libcxx/test/std/ranges/range.access/range.access.cbegin/incomplete.compile.verify copy.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.cbegin/incomplete.compile.verify copy.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unspecified begin; diff --git a/libcxx/test/std/ranges/range.access/range.access.cbegin/incomplete.compile.verify.cpp b/libcxx/test/std/ranges/range.access/range.access.cbegin/incomplete.compile.verify.cpp --- a/libcxx/test/std/ranges/range.access/range.access.cbegin/incomplete.compile.verify.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.cbegin/incomplete.compile.verify.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // ranges::cbegin; diff --git a/libcxx/test/std/ranges/range.access/range.access.cend/incomplete.compile.verify.cpp b/libcxx/test/std/ranges/range.access/range.access.cend/incomplete.compile.verify.cpp --- a/libcxx/test/std/ranges/range.access/range.access.cend/incomplete.compile.verify.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.cend/incomplete.compile.verify.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unspecified begin; diff --git a/libcxx/test/std/ranges/range.access/range.access.end/end.cpp b/libcxx/test/std/ranges/range.access/range.access.end/end.cpp --- a/libcxx/test/std/ranges/range.access/range.access.end/end.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.end/end.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // std::ranges::end diff --git a/libcxx/test/std/ranges/range.access/range.access.end/incomplete.compile.verify.cpp b/libcxx/test/std/ranges/range.access/range.access.end/incomplete.compile.verify.cpp --- a/libcxx/test/std/ranges/range.access/range.access.end/incomplete.compile.verify.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.end/incomplete.compile.verify.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unspecified begin; diff --git a/libcxx/test/std/ranges/range.range/enable_borrowed_range.compile.pass.cpp b/libcxx/test/std/ranges/range.range/enable_borrowed_range.compile.pass.cpp --- a/libcxx/test/std/ranges/range.range/enable_borrowed_range.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.range/enable_borrowed_range.compile.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/ranges/range.range/helper_aliases.compile.pass.cpp b/libcxx/test/std/ranges/range.range/helper_aliases.compile.pass.cpp --- a/libcxx/test/std/ranges/range.range/helper_aliases.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.range/helper_aliases.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // using range_difference_t = iter_difference_t>; diff --git a/libcxx/test/std/ranges/range.range/iterator_t.compile.pass.cpp b/libcxx/test/std/ranges/range.range/iterator_t.compile.pass.cpp --- a/libcxx/test/std/ranges/range.range/iterator_t.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.range/iterator_t.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // using iterator_t = decltype(ranges::begin(declval())); diff --git a/libcxx/test/std/ranges/range.range/range.compile.pass.cpp b/libcxx/test/std/ranges/range.range/range.compile.pass.cpp --- a/libcxx/test/std/ranges/range.range/range.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.range/range.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept range; diff --git a/libcxx/test/std/ranges/range.range/sentinel_t.compile.pass.cpp b/libcxx/test/std/ranges/range.range/sentinel_t.compile.pass.cpp --- a/libcxx/test/std/ranges/range.range/sentinel_t.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.range/sentinel_t.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // using sentinel_t = decltype(ranges::end(declval<_Rp&>())); diff --git a/libcxx/test/std/ranges/range.refinements/common_range.compile.pass.cpp b/libcxx/test/std/ranges/range.refinements/common_range.compile.pass.cpp --- a/libcxx/test/std/ranges/range.refinements/common_range.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.refinements/common_range.compile.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // template // concept common_range; diff --git a/libcxx/test/std/ranges/range.refinements/input_range.compile.pass.cpp b/libcxx/test/std/ranges/range.refinements/input_range.compile.pass.cpp --- a/libcxx/test/std/ranges/range.refinements/input_range.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.refinements/input_range.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept input_range; diff --git a/libcxx/test/std/ranges/range.refinements/subsumption.compile.pass.cpp b/libcxx/test/std/ranges/range.refinements/subsumption.compile.pass.cpp --- a/libcxx/test/std/ranges/range.refinements/subsumption.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.refinements/subsumption.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // template // concept input_iterator; diff --git a/libcxx/test/std/re/re.iter/re.regiter/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/re/re.iter/re.regiter/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/re/re.iter/re.regiter/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/re/re.iter/re.regiter/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // regex_iterator diff --git a/libcxx/test/std/re/re.iter/re.tokiter/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/re/re.iter/re.tokiter/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/re/re.iter/re.tokiter/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/re/re.iter/re.tokiter/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // regex_token_iterator diff --git a/libcxx/test/std/re/re.results/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/re/re.results/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/re/re.results/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/re/re.results/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // match_results diff --git a/libcxx/test/std/strings/basic.string/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/strings/basic.string/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/strings/basic.string/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/strings/basic.string/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // string diff --git a/libcxx/test/std/strings/basic.string/string.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/strings/basic.string/string.iterators/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.iterators/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/strings/string.view/enable_borrowed_range.compile.pass.cpp b/libcxx/test/std/strings/string.view/enable_borrowed_range.compile.pass.cpp --- a/libcxx/test/std/strings/string.view/enable_borrowed_range.compile.pass.cpp +++ b/libcxx/test/std/strings/string.view/enable_borrowed_range.compile.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/strings/string.view/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/strings/string.view/range_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/strings/string.view/range_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/strings/string.view/range_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // string_view diff --git a/libcxx/test/std/strings/string.view/string.view.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/strings/string.view/string.view.iterators/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/strings/string.view/string.view.iterators/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.iterators/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // iterator, const_iterator, reverse_iterator, const_reverse_iterator diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp @@ -8,8 +8,6 @@ // // UNSUPPORTED: libcpp-has-no-threads -// XFAIL: LIBCXX-WINDOWS-FIXME - // // class thread @@ -138,7 +136,7 @@ for (int i=0; i <= numAllocs; ++i) { throw_one = i; f_run = false; - unsigned old_outstanding = outstanding_new; + TEST_NOT_WIN32_DLL(unsigned old_outstanding = outstanding_new); try { std::thread t(f); assert(i == numAllocs); // Only final iteration will not throw. @@ -148,7 +146,9 @@ assert(i < numAllocs); assert(!f_run); // (2.2) } - assert(old_outstanding == outstanding_new); // (2.3) + // In DLL builds on Windows, the overridden operators new/delete won't + // override calls from within the DLL, so this won't match. + TEST_NOT_WIN32_DLL(assert(old_outstanding == outstanding_new)); // (2.3) } f_run = false; throw_one = 0xFFF; diff --git a/libcxx/test/std/utilities/function.objects/range.cmp/equal_to.pass.cpp b/libcxx/test/std/utilities/function.objects/range.cmp/equal_to.pass.cpp --- a/libcxx/test/std/utilities/function.objects/range.cmp/equal_to.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/range.cmp/equal_to.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/utilities/function.objects/range.cmp/greater.pass.cpp b/libcxx/test/std/utilities/function.objects/range.cmp/greater.pass.cpp --- a/libcxx/test/std/utilities/function.objects/range.cmp/greater.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/range.cmp/greater.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/utilities/function.objects/range.cmp/greater_equal.pass.cpp b/libcxx/test/std/utilities/function.objects/range.cmp/greater_equal.pass.cpp --- a/libcxx/test/std/utilities/function.objects/range.cmp/greater_equal.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/range.cmp/greater_equal.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/utilities/function.objects/range.cmp/less.pass.cpp b/libcxx/test/std/utilities/function.objects/range.cmp/less.pass.cpp --- a/libcxx/test/std/utilities/function.objects/range.cmp/less.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/range.cmp/less.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/utilities/function.objects/range.cmp/less_equal.pass.cpp b/libcxx/test/std/utilities/function.objects/range.cmp/less_equal.pass.cpp --- a/libcxx/test/std/utilities/function.objects/range.cmp/less_equal.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/range.cmp/less_equal.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/utilities/function.objects/range.cmp/not_equal_to.pass.cpp b/libcxx/test/std/utilities/function.objects/range.cmp/not_equal_to.pass.cpp --- a/libcxx/test/std/utilities/function.objects/range.cmp/not_equal_to.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/range.cmp/not_equal_to.pass.cpp @@ -8,7 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts -// XFAIL: msvc && clang // diff --git a/libcxx/test/std/utilities/memory/unique.ptr/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/utilities/memory/unique.ptr/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/utilities/memory/unique.ptr/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/utilities/memory/unique.ptr/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unique_ptr diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // shared_ptr diff --git a/libcxx/test/std/utilities/optional/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/utilities/optional/iterator_concept_conformance.compile.pass.cpp --- a/libcxx/test/std/utilities/optional/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/utilities/optional/iterator_concept_conformance.compile.pass.cpp @@ -9,7 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: gcc-10 -// XFAIL: msvc && clang // unique_ptr diff --git a/libcxx/test/support/nasty_macros.h b/libcxx/test/support/nasty_macros.h --- a/libcxx/test/support/nasty_macros.h +++ b/libcxx/test/support/nasty_macros.h @@ -56,7 +56,13 @@ #ifndef _WIN32 #define __allocator NASTY_MACRO #define __deallocate NASTY_MACRO +#define __deref NASTY_MACRO +#define __full NASTY_MACRO +#define __in NASTY_MACRO +#define __inout NASTY_MACRO +#define __nz NASTY_MACRO #define __out NASTY_MACRO +#define __part NASTY_MACRO #endif #define __output NASTY_MACRO diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h --- a/libcxx/test/support/test_macros.h +++ b/libcxx/test/support/test_macros.h @@ -382,6 +382,14 @@ #define TEST_NOT_WIN32(...) __VA_ARGS__ #endif +#if defined(_WIN32) && !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) +#define TEST_NOT_WIN32_DLL(...) ((void)0) +#define TEST_ONLY_WIN32_DLL(...) __VA_ARGS__ +#else +#define TEST_NOT_WIN32_DLL(...) __VA_ARGS__ +#define TEST_ONLY_WIN32_DLL(...) ((void)0) +#endif + #ifdef _WIN32 #define TEST_WIN_NO_FILESYSTEM_PERMS_NONE #endif diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp --- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp @@ -37,14 +37,12 @@ NativeRegisterContextFreeBSD_arm64::NativeRegisterContextFreeBSD_arm64( const ArchSpec &target_arch, NativeThreadProtocol &native_thread) : NativeRegisterContextRegisterInfo( - native_thread, new RegisterInfoPOSIX_arm64(target_arch)) + native_thread, new RegisterInfoPOSIX_arm64(target_arch, 0)) #ifdef LLDB_HAS_FREEBSD_WATCHPOINT , m_read_dbreg(false) #endif { - GetRegisterInfo().ConfigureVectorRegisterInfos( - RegisterInfoPOSIX_arm64::eVectorQuadwordAArch64); ::memset(&m_hwp_regs, 0, sizeof(m_hwp_regs)); ::memset(&m_hbp_regs, 0, sizeof(m_hbp_regs)); } @@ -79,8 +77,6 @@ return NativeProcessFreeBSD::PtraceWrapper( PT_GETFPREGS, m_thread.GetID(), m_reg_data.data() + sizeof(RegisterInfoPOSIX_arm64::GPR)); - case RegisterInfoPOSIX_arm64::SVERegSet: - return Status("not supported"); } llvm_unreachable("NativeRegisterContextFreeBSD_arm64::ReadRegisterSet"); } @@ -94,8 +90,6 @@ return NativeProcessFreeBSD::PtraceWrapper( PT_SETFPREGS, m_thread.GetID(), m_reg_data.data() + sizeof(RegisterInfoPOSIX_arm64::GPR)); - case RegisterInfoPOSIX_arm64::SVERegSet: - return Status("not supported"); } llvm_unreachable("NativeRegisterContextFreeBSD_arm64::WriteRegisterSet"); } diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1461,9 +1461,6 @@ /// Return true if any backedge taken count expressions refer to the given /// subexpression. bool hasOperand(const SCEV *S, ScalarEvolution *SE) const; - - /// Invalidate this result and free associated memory. - void clear(); }; /// Cache the backedge-taken count of the loops for this function as they diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -2594,16 +2594,6 @@ // return it's sign information. llvm::Optional isConstant() const; - /// Return the number of unique location operands referred to (via - /// DW_OP_LLVM_arg) in this expression; this is not necessarily the number of - /// instances of DW_OP_LLVM_arg within the expression. - /// For example, for the expression: - /// (DW_OP_LLVM_arg 0, DW_OP_LLVM_arg 1, DW_OP_plus, - /// DW_OP_LLVM_arg 0, DW_OP_mul) - /// This function would return 2, as there are two unique location operands - /// (0 and 1). - uint64_t getNumLocationOperands() const; - using element_iterator = ArrayRef::iterator; element_iterator elements_begin() const { return getElements().begin(); } @@ -2751,10 +2741,6 @@ /// return true with an offset of zero. bool extractIfOffset(int64_t &Offset) const; - /// Returns true iff this DIExpression contains at least one instance of - /// `DW_OP_LLVM_arg, n` for all n in [0, N). - bool hasAllLocationOps(unsigned N) const; - /// Checks if the last 4 elements of the expression are DW_OP_constu DW_OP_swap DW_OP_xderef and extracts the . diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1122,9 +1122,7 @@ /// must be at least as wide as the IntPtr type for the address space of /// the base GEP pointer. bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; - bool collectOffset(const DataLayout &DL, unsigned BitWidth, - SmallDenseMap &VariableOffsets, - APInt &ConstantOffset) const; + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::GetElementPtr); diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -204,11 +204,6 @@ void replaceVariableLocationOp(Value *OldValue, Value *NewValue); void replaceVariableLocationOp(unsigned OpIdx, Value *NewValue); - /// Adding a new location operand will always result in this intrinsic using - /// an ArgList, and must always be accompanied by a new expression that uses - /// the new operand. - void addVariableLocationOps(ArrayRef NewValues, - DIExpression *NewExpr); void setVariable(DILocalVariable *NewVar) { setArgOperand(1, MetadataAsValue::get(NewVar->getContext(), NewVar)); diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -576,12 +576,6 @@ Type *SourceType, ArrayRef Index, const DataLayout &DL, APInt &Offset, function_ref ExternalAnalysis = nullptr); - - /// Collect the offset of this GEP as a map of Values to their associated - /// APInt multipliers, as well as a total Constant Offset. - bool collectOffset(const DataLayout &DL, unsigned BitWidth, - SmallDenseMap &VariableOffsets, - APInt &ConstantOffset) const; }; class PtrToIntOperator diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -298,8 +298,7 @@ /// appended to the expression. \p LocNo: the index of the location operand to /// which \p I applies, should be 0 for debug info without a DIArgList. DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr, - bool StackVal, unsigned LocNo, - SmallVectorImpl &AdditionalValues); + bool StackVal, unsigned LocNo); /// Point debug users of \p From to \p To or salvage them. Use this function /// only when replacing all uses of \p From with \p To, with a guarantee that diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -1103,17 +1103,27 @@ if (matchICmpOperand(Offset, RHS, Val, SwappedPred)) return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset); - // If (Val & Mask) == C then all the masked bits are known and we can compute - // a value range based on that. const APInt *Mask, *C; - if (EdgePred == ICmpInst::ICMP_EQ && - match(LHS, m_And(m_Specific(Val), m_APInt(Mask))) && + if (match(LHS, m_And(m_Specific(Val), m_APInt(Mask))) && match(RHS, m_APInt(C))) { - KnownBits Known; - Known.Zero = ~*C & *Mask; - Known.One = *C & *Mask; - return ValueLatticeElement::getRange( - ConstantRange::fromKnownBits(Known, /*IsSigned*/ false)); + // If (Val & Mask) == C then all the masked bits are known and we can + // compute a value range based on that. + if (EdgePred == ICmpInst::ICMP_EQ) { + KnownBits Known; + Known.Zero = ~*C & *Mask; + Known.One = *C & *Mask; + return ValueLatticeElement::getRange( + ConstantRange::fromKnownBits(Known, /*IsSigned*/ false)); + } + // If (Val & Mask) != 0 then the value must be larger than the lowest set + // bit of Mask. + if (EdgePred == ICmpInst::ICMP_NE && !Mask->isNullValue() && + C->isNullValue()) { + unsigned BitWidth = Ty->getIntegerBitWidth(); + return ValueLatticeElement::getRange(ConstantRange::getNonEmpty( + APInt::getOneBitSet(BitWidth, Mask->countTrailingZeros()), + APInt::getNullValue(BitWidth))); + } } return ValueLatticeElement::getOverdefined(); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -7207,16 +7207,6 @@ } void ScalarEvolution::forgetLoop(const Loop *L) { - // Drop any stored trip count value. - auto RemoveLoopFromBackedgeMap = - [](DenseMap &Map, const Loop *L) { - auto BTCPos = Map.find(L); - if (BTCPos != Map.end()) { - BTCPos->second.clear(); - Map.erase(BTCPos); - } - }; - SmallVector LoopWorklist(1, L); SmallVector Worklist; SmallPtrSet Visited; @@ -7225,8 +7215,9 @@ while (!LoopWorklist.empty()) { auto *CurrL = LoopWorklist.pop_back_val(); - RemoveLoopFromBackedgeMap(BackedgeTakenCounts, CurrL); - RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts, CurrL); + // Drop any stored trip count value. + BackedgeTakenCounts.erase(CurrL); + PredicatedBackedgeTakenCounts.erase(CurrL); // Drop information about predicated SCEV rewrites for this loop. for (auto I = PredicatedSCEVRewrites.begin(); @@ -7484,11 +7475,6 @@ "No point in having a non-constant max backedge taken count!"); } -/// Invalidate this result and free the ExitNotTakenInfo array. -void ScalarEvolution::BackedgeTakenInfo::clear() { - ExitNotTaken.clear(); -} - /// Compute the number of times the backedge of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo ScalarEvolution::computeBackedgeTakenCount(const Loop *L, @@ -12227,13 +12213,8 @@ ExprValueMap.clear(); ValueExprMap.clear(); HasRecMap.clear(); - - // Free any extra memory created for ExitNotTakenInfo in the unlikely event - // that a loop had multiple computable exits. - for (auto &BTCI : BackedgeTakenCounts) - BTCI.second.clear(); - for (auto &BTCI : PredicatedBackedgeTakenCounts) - BTCI.second.clear(); + BackedgeTakenCounts.clear(); + PredicatedBackedgeTakenCounts.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(PendingPhiRanges.empty() && "getRangeRef garbage"); @@ -12648,10 +12629,9 @@ [S, this](DenseMap &Map) { for (auto I = Map.begin(), E = Map.end(); I != E;) { BackedgeTakenInfo &BEInfo = I->second; - if (BEInfo.hasOperand(S, this)) { - BEInfo.clear(); + if (BEInfo.hasOperand(S, this)) Map.erase(I++); - } else + else ++I; } }; diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h --- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -121,6 +121,11 @@ // Currently, DBG_VALUE_VAR expressions must use stack_value. assert(Expr && Expr->isValid() && is_contained(Locs, dwarf::DW_OP_stack_value)); + for (DbgValueLocEntry &Entry : ValueLocEntries) { + assert(!Entry.isConstantFP() && !Entry.isConstantInt() && + "Constant values should only be present in non-variadic " + "DBG_VALUEs."); + } #endif } @@ -137,6 +142,11 @@ // Currently, DBG_VALUE_VAR expressions must use stack_value. assert(Expr && Expr->isValid() && is_contained(Expr->getElements(), dwarf::DW_OP_stack_value)); + for (DbgValueLocEntry &Entry : ValueLocEntries) { + assert(!Entry.isConstantFP() && !Entry.isConstantInt() && + "Constant values should only be present in non-variadic " + "DBG_VALUEs."); + } } #endif } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1238,10 +1238,6 @@ } void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { - // TODO: For the variadic implementation, instead of only checking the fail - // state of `handleDebugValue`, we need know specifically which values were - // invalid, so that we attempt to salvage only those values when processing - // a DIArgList. assert(!DDI.getDI()->hasArgList() && "Not implemented for variadic dbg_values"); Value *V = DDI.getDI()->getValue(0); @@ -1265,21 +1261,16 @@ while (isa(V)) { Instruction &VAsInst = *cast(V); // Temporary "0", awaiting real implementation. - SmallVector AdditionalValues; - DIExpression *SalvagedExpr = - salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues); + DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0); // If we cannot salvage any further, and haven't yet found a suitable debug // expression, bail out. - // TODO: If AdditionalValues isn't empty, then the salvage can only be - // represented with a DBG_VALUE_LIST, so we give up. When we have support - // here for variadic dbg_values, remove that condition. - if (!SalvagedExpr || !AdditionalValues.empty()) + if (!NewExpr) break; // New value and expr now represent this debuginfo. V = VAsInst.getOperand(0); - Expr = SalvagedExpr; + Expr = NewExpr; // Some kind of simplification occurred: check whether the operand of the // salvaged debug expression can be encoded in this DAG. diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1244,17 +1244,6 @@ return false; } -bool DIExpression::hasAllLocationOps(unsigned N) const { - SmallDenseSet SeenOps; - for (auto ExprOp : expr_ops()) - if (ExprOp.getOp() == dwarf::DW_OP_LLVM_arg) - SeenOps.insert(ExprOp.getArg(0)); - for (uint64_t Idx = 0; Idx < N; ++Idx) - if (!is_contained(SeenOps, Idx)) - return false; - return true; -} - const DIExpression *DIExpression::extractAddressClass(const DIExpression *Expr, unsigned &AddrClass) { // FIXME: This seems fragile. Nothing that verifies that these elements @@ -1469,16 +1458,6 @@ return DIExpression::get(Expr->getContext(), Ops); } -uint64_t DIExpression::getNumLocationOperands() const { - uint64_t Result = 0; - for (auto ExprOp : expr_ops()) - if (ExprOp.getOp() == dwarf::DW_OP_LLVM_arg) - Result = std::max(Result, ExprOp.getArg(0) + 1); - assert(hasAllLocationOps(Result) && - "Expression is missing one or more location operands."); - return Result; -} - llvm::Optional DIExpression::isConstant() const { diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1806,15 +1806,6 @@ return cast(this)->accumulateConstantOffset(DL, Offset); } -bool GetElementPtrInst::collectOffset( - const DataLayout &DL, unsigned BitWidth, - SmallDenseMap &VariableOffsets, - APInt &ConstantOffset) const { - // Delegate to the generic GEPOperator implementation. - return cast(this)->collectOffset(DL, BitWidth, VariableOffsets, - ConstantOffset); -} - //===----------------------------------------------------------------------===// // ExtractElementInst Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -118,23 +118,6 @@ 0, MetadataAsValue::get(getContext(), DIArgList::get(getContext(), MDs))); } -void DbgVariableIntrinsic::addVariableLocationOps(ArrayRef NewValues, - DIExpression *NewExpr) { - assert(NewExpr->hasAllLocationOps(getNumVariableLocationOps() + - NewValues.size()) && - "NewExpr for debug variable intrinsic does not reference every " - "location operand."); - assert(!is_contained(NewValues, nullptr) && "New values must be non-null"); - setArgOperand(2, MetadataAsValue::get(getContext(), NewExpr)); - SmallVector MDs; - for (auto *VMD : location_ops()) - MDs.push_back(getAsMetadata(VMD)); - for (auto *VMD : NewValues) - MDs.push_back(getAsMetadata(VMD)); - setArgOperand( - 0, MetadataAsValue::get(getContext(), DIArgList::get(getContext(), MDs))); -} - Optional DbgVariableIntrinsic::getFragmentSizeInBits() const { if (auto Fragment = getExpression()->getFragmentInfo()) return Fragment->SizeInBits; diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -142,61 +142,4 @@ } return true; } - -bool GEPOperator::collectOffset( - const DataLayout &DL, unsigned BitWidth, - SmallDenseMap &VariableOffsets, - APInt &ConstantOffset) const { - assert(BitWidth == DL.getIndexSizeInBits(getPointerAddressSpace()) && - "The offset bit width does not match DL specification."); - - auto CollectConstantOffset = [&](APInt Index, uint64_t Size) { - Index = Index.sextOrTrunc(BitWidth); - APInt IndexedSize = APInt(BitWidth, Size); - ConstantOffset += Index * IndexedSize; - }; - - for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this); - GTI != GTE; ++GTI) { - // Scalable vectors are multiplied by a runtime constant. - bool ScalableType = isa(GTI.getIndexedType()); - - Value *V = GTI.getOperand(); - StructType *STy = GTI.getStructTypeOrNull(); - // Handle ConstantInt if possible. - if (auto ConstOffset = dyn_cast(V)) { - if (ConstOffset->isZero()) - continue; - // If the type is scalable and the constant is not zero (vscale * n * 0 = - // 0) bailout. - // TODO: If the runtime value is accessible at any point before DWARF - // emission, then we could potentially keep a forward reference to it - // in the debug value to be filled in later. - if (ScalableType) - return false; - // Handle a struct index, which adds its field offset to the pointer. - if (STy) { - unsigned ElementIdx = ConstOffset->getZExtValue(); - const StructLayout *SL = DL.getStructLayout(STy); - // Element offset is in bytes. - CollectConstantOffset(APInt(BitWidth, SL->getElementOffset(ElementIdx)), - 1); - continue; - } - CollectConstantOffset(ConstOffset->getValue(), - DL.getTypeAllocSize(GTI.getIndexedType())); - continue; - } - - if (STy || ScalableType) - return false; - // Insert an initial offset of 0 for V iff none exists already, then - // increment the offset by IndexedSize. - VariableOffsets.try_emplace(V, BitWidth, 0); - APInt IndexedSize = - APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType())); - VariableOffsets[V] += IndexedSize; - } - return true; -} } // namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16669,25 +16669,36 @@ unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size > 128) return AtomicExpansionKind::None; - // Nand not supported in LSE. - if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; - // Leave 128 bits to LLSC. - if (Subtarget->hasLSE() && Size < 128) - return AtomicExpansionKind::None; - if (Subtarget->outlineAtomics() && Size < 128) { - // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. - // Don't outline them unless - // (1) high level support approved: - // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf - // (2) low level libgcc and compiler-rt support implemented by: - // min/max outline atomics helpers - if (AI->getOperation() != AtomicRMWInst::Min && - AI->getOperation() != AtomicRMWInst::Max && - AI->getOperation() != AtomicRMWInst::UMin && - AI->getOperation() != AtomicRMWInst::UMax) { + + // Nand is not supported in LSE. + // Leave 128 bits to LLSC or CmpXChg. + if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { + if (Subtarget->hasLSE()) return AtomicExpansionKind::None; + if (Subtarget->outlineAtomics()) { + // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. + // Don't outline them unless + // (1) high level support approved: + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf + // (2) low level libgcc and compiler-rt support implemented by: + // min/max outline atomics helpers + if (AI->getOperation() != AtomicRMWInst::Min && + AI->getOperation() != AtomicRMWInst::Max && + AI->getOperation() != AtomicRMWInst::UMin && + AI->getOperation() != AtomicRMWInst::UMax) { + return AtomicExpansionKind::None; + } } } + + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on the + // stack and close enough to the spill slot, this can lead to a situation + // where the monitor always gets cleared and the atomic operation can never + // succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::LLSC; } diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -555,6 +555,7 @@ include "X86ScheduleSLM.td" include "X86ScheduleZnver1.td" include "X86ScheduleZnver2.td" +include "X86ScheduleZnver3.td" include "X86ScheduleBdVer2.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" @@ -1382,7 +1383,7 @@ ProcessorFeatures.ZNTuning>; def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, ProcessorFeatures.ZN2Tuning>; -def : ProcModel<"znver3", Znver2Model, ProcessorFeatures.ZN3Features, +def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, ProcessorFeatures.ZN3Tuning>; def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -643,7 +643,8 @@ OutStreamer->SwitchSection(Nt); // Emitting note header. - int WordSize = TT.isArch64Bit() ? 8 : 4; + const int WordSize = + TT.isArch64Bit() && TT.getEnvironment() != Triple::GNUX32 ? 8 : 4; emitAlignment(WordSize == 4 ? Align(4) : Align(8)); OutStreamer->emitIntValue(4, 4 /*size*/); // data size for "GNU\0" OutStreamer->emitIntValue(8 + WordSize, 4 /*size*/); // Elf_Prop size diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td --- a/llvm/lib/Target/X86/X86PfmCounters.td +++ b/llvm/lib/Target/X86/X86PfmCounters.td @@ -233,3 +233,16 @@ ]; } def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>; + +def ZnVer3PfmCounters : ProcPfmCounters { + let CycleCounter = PfmCounter<"cycles_not_in_halt">; + let UopsCounter = PfmCounter<"retired_ops">; + let IssueCounters = [ + PfmIssueCounter<"Zn3Int", "ops_type_dispatched_from_decoder:int_disp_retire_mode">, + PfmIssueCounter<"Zn3FPU", "ops_type_dispatched_from_decoder:fp_disp_retire_mode">, + PfmIssueCounter<"Zn3Load", "ls_dispatch:ld_dispatch">, + PfmIssueCounter<"Zn3Store", "ls_dispatch:store_dispatch">, + PfmIssueCounter<"Zn3Divider", "div_op_count"> + ]; +} +def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td @@ -0,0 +1,1455 @@ +//=- X86ScheduleZnver3.td - X86 Znver3 Scheduling ------------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Znver3 to support instruction +// scheduling and other instruction cost heuristics. +// Based on: +// * AMD Software Optimization Guide for AMD Family 19h Processors. +// https://www.amd.com/system/files/TechDocs/56665.zip +// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog +// http://www.agner.org/optimize/microarchitecture.pdf +// * AMD Zen 3 Ryzen Deep Dive Review +// https://www.anandtech.com/show/16214/ +//===----------------------------------------------------------------------===// + +def Znver3Model : SchedMachineModel { + // AMD SOG 19h, 2.9.6 Dispatch + // The processor may dispatch up to 6 macro ops per cycle + // into the execution engine. + let IssueWidth = 6; + // AMD SOG 19h, 2.10.3 + // The retire control unit (RCU) tracks the completion status of all + // outstanding operations (integer, load/store, and floating-point) and is + // the final arbiter for exception processing and recovery. + // The unit can receive up to 6 macro ops dispatched per cycle and track up + // to 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. + let MicroOpBufferSize = 256; + // AMD SOG 19h, 2.9.1 Op Cache + // The op cache is organized as an associative cache with 64 sets and 8 ways. + // At each set-way intersection is an entry containing up to 8 macro ops. + // The maximum capacity of the op cache is 4K ops. + // Agner, 22.5 µop cache + // The size of the µop cache is big enough for holding most critical loops. + let LoopMicroOpBufferSize = 4096; + // AMD SOG 19h, 2.6.2 L1 Data Cache + // The L1 data cache has a 4- or 5- cycle integer load-to-use latency. + // AMD SOG 19h, 2.12 L1 Data Cache + // The AGU and LS pipelines are optimized for simple address generation modes. + // <...> and can achieve 4-cycle load-to-use integer load latency. + let LoadLatency = 4; + // AMD SOG 19h, 2.12 L1 Data Cache + // The AGU and LS pipelines are optimized for simple address generation modes. + // <...> and can achieve <...> 7-cycle load-to-use FP load latency. + int VecLoadLatency = 7; + // Latency of a simple store operation. + int StoreLatency = 1; + // FIXME + let HighLatency = 25; // FIXME: any better choice? + // AMD SOG 19h, 2.8 Optimizing Branching + // The branch misprediction penalty is in the range from 11 to 18 cycles, + // <...>. The common case penalty is 13 cycles. + let MispredictPenalty = 13; + + let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. + + // FIXME: This variable is required for incomplete model. + // We haven't catered all instructions. + // So, we reset the value of this variable so as to + // say that the model is incomplete. + let CompleteModel = 0; +} + +let SchedModel = Znver3Model in { + + +//===----------------------------------------------------------------------===// +// RCU +//===----------------------------------------------------------------------===// + +// AMD SOG 19h, 2.10.3 Retire Control Unit +// The unit can receive up to 6 macro ops dispatched per cycle and track up to +// 256 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. <...> +// The retire unit handles in-order commit of up to eight macro ops per cycle. +def Zn3RCU : RetireControlUnit; + +//===----------------------------------------------------------------------===// +// Units +//===----------------------------------------------------------------------===// + +// There are total of three Units, each one with it's own schedulers. + +//===----------------------------------------------------------------------===// +// Integer Execution Unit +// + +// AMD SOG 19h, 2.4 Superscalar Organization +// The processor uses four decoupled independent integer scheduler queues, +// each one servicing one ALU pipeline and one or two other pipelines + +// +// Execution pipes +//===----------------------------------------------------------------------===// + +// AMD SOG 19h, 2.10.2 Execution Units +// The processor contains 4 general purpose integer execution pipes. +// Each pipe has an ALU capable of general purpose integer operations. +def Zn3ALU0 : ProcResource<1>; +def Zn3ALU1 : ProcResource<1>; +def Zn3ALU2 : ProcResource<1>; +def Zn3ALU3 : ProcResource<1>; + +// AMD SOG 19h, 2.10.2 Execution Units +// There is also a separate branch execution unit. +def Zn3BRU1 : ProcResource<1>; + +// AMD SOG 19h, 2.10.2 Execution Units +// There are three Address Generation Units (AGUs) for all load and store +// address generation. There are also 3 store data movement units +// associated with the same schedulers as the AGUs. +def Zn3AGU0 : ProcResource<1>; +def Zn3AGU1 : ProcResource<1>; +def Zn3AGU2 : ProcResource<1>; + +// +// Execution Units +//===----------------------------------------------------------------------===// + +// AMD SOG 19h, 2.10.2 Execution Units +// ALU0 additionally has divide <...> execution capability. +defvar Zn3Divider = Zn3ALU0; + +// AMD SOG 19h, 2.10.2 Execution Units +// ALU0 additionally has <...> branch execution capability. +defvar Zn3BRU0 = Zn3ALU0; + +// Integer Multiplication issued on ALU1. +defvar Zn3Multiplier = Zn3ALU1; + +// Execution pipeline grouping +//===----------------------------------------------------------------------===// + +// General ALU operations +def Zn3ALU0123 : ProcResGroup<[Zn3ALU0, Zn3ALU1, Zn3ALU2, Zn3ALU3]>; + +// General AGU operations +def Zn3AGU012 : ProcResGroup<[Zn3AGU0, Zn3AGU1, Zn3AGU2]>; + +// Control flow: jumps, calls +def Zn3BRU01 : ProcResGroup<[Zn3BRU0, Zn3BRU1]>; + +// Everything that isn't control flow, but still needs to access CC register, +// namely: conditional moves, SETcc. +def Zn3ALU03 : ProcResGroup<[Zn3ALU0, Zn3ALU3]>; + +// Zn3ALU1 handles complex bit twiddling: CRC/PDEP/PEXT + +// Simple bit twiddling: bit test, shift/rotate, bit extraction +def Zn3ALU12 : ProcResGroup<[Zn3ALU1, Zn3ALU2]>; + + +// +// Scheduling +//===----------------------------------------------------------------------===// + +// AMD SOG 19h, 2.10.3 Retire Control Unit +// The integer physical register file (PRF) consists of 192 registers. +def Zn3IntegerPRF : RegisterFile<192, [GR64, CCR]>; + +// anandtech, The integer scheduler has a 4*24 entry macro op capacity. +// AMD SOG 19h, 2.10.1 Schedulers +// The schedulers can receive up to six macro ops per cycle, with a limit of +// two per scheduler. Each scheduler can issue one micro op per cycle into +// each of its associated pipelines +// FIXME: these are 4 separate schedulers, not a single big one. +def Zn3Int : ProcResGroup<[Zn3ALU0, Zn3AGU0, Zn3BRU0, // scheduler 0 + Zn3ALU1, Zn3AGU1, // scheduler 1 + Zn3ALU2, Zn3AGU2, // scheduler 2 + Zn3ALU3, Zn3BRU1 // scheduler 3 + ]> { + let BufferSize = !mul(4, 24); +} + + +//===----------------------------------------------------------------------===// +// Floating-Point Unit +// + +// AMD SOG 19h, 2.4 Superscalar Organization +// The processor uses <...> two decoupled independent floating point schedulers +// each servicing two FP pipelines and one store or FP-to-integer pipeline. + +// +// Execution pipes +//===----------------------------------------------------------------------===// + +// AMD SOG 19h, 2.10.1 Schedulers +// <...>, and six FPU pipes. +// Agner, 22.10 Floating point execution pipes +// There are six floating point/vector execution pipes, +def Zn3FPP0 : ProcResource<1>; +def Zn3FPP1 : ProcResource<1>; +def Zn3FPP2 : ProcResource<1>; +def Zn3FPP3 : ProcResource<1>; +def Zn3FPP45 : ProcResource<2>; + +// +// Execution Units +//===----------------------------------------------------------------------===// +// AMD SOG 19h, 2.11.1 Floating Point Execution Resources + +// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) +defvar Zn3FPFMul0 = Zn3FPP0; +defvar Zn3FPFMul1 = Zn3FPP1; + +// (v)FADD* +defvar Zn3FPFAdd0 = Zn3FPP2; +defvar Zn3FPFAdd1 = Zn3FPP3; + +// All convert operations except pack/unpack +defvar Zn3FPFCvt0 = Zn3FPP2; +defvar Zn3FPFCvt1 = Zn3FPP3; + +// All Divide and Square Root except Reciprocal Approximation +// AMD SOG 19h, 2.11.1 Floating Point Execution Resources +// FDIV unit can support 2 simultaneous operations in flight +// even though it occupies a single pipe. +// FIXME: BufferSize=2 ? +defvar Zn3FPFDiv = Zn3FPP1; + +// Moves and Logical operations on Floating Point Data Types +defvar Zn3FPFMisc0 = Zn3FPP0; +defvar Zn3FPFMisc1 = Zn3FPP1; +defvar Zn3FPFMisc2 = Zn3FPP2; +defvar Zn3FPFMisc3 = Zn3FPP3; + +// Integer Adds, Subtracts, and Compares +// Some complex VADD operations are not available in all pipes. +defvar Zn3FPVAdd0 = Zn3FPP0; +defvar Zn3FPVAdd1 = Zn3FPP1; +defvar Zn3FPVAdd2 = Zn3FPP2; +defvar Zn3FPVAdd3 = Zn3FPP3; + +// Integer Multiplies, SAD, Blendvb +defvar Zn3FPVMul0 = Zn3FPP0; +defvar Zn3FPVMul1 = Zn3FPP3; + +// Data Shuffles, Packs, Unpacks, Permute +// Some complex shuffle operations are only available in pipe1. +defvar Zn3FPVShuf = Zn3FPP1; +defvar Zn3FPVShufAux = Zn3FPP2; + +// Bit Shift Left/Right operations +defvar Zn3FPVShift0 = Zn3FPP1; +defvar Zn3FPVShift1 = Zn3FPP2; + +// Moves and Logical operations on Packed Integer Data Types +defvar Zn3FPVMisc0 = Zn3FPP0; +defvar Zn3FPVMisc1 = Zn3FPP1; +defvar Zn3FPVMisc2 = Zn3FPP2; +defvar Zn3FPVMisc3 = Zn3FPP3; + +// *AES* +defvar Zn3FPAES0 = Zn3FPP0; +defvar Zn3FPAES1 = Zn3FPP1; + +// *CLM* +defvar Zn3FPCLM0 = Zn3FPP0; +defvar Zn3FPCLM1 = Zn3FPP1; + +// Execution pipeline grouping +//===----------------------------------------------------------------------===// + +// AMD SOG 19h, 2.11 Floating-Point Unit +// Stores and floating point to general purpose register transfer +// have 2 dedicated pipelines (pipe 5 and 6). +def Zn3FPU0123 : ProcResGroup<[Zn3FPP0, Zn3FPP1, Zn3FPP2, Zn3FPP3]>; + +// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) +def Zn3FPFMul01 : ProcResGroup<[Zn3FPFMul0, Zn3FPFMul1]>; + +// (v)FADD* +// Some complex VADD operations are not available in all pipes. +def Zn3FPFAdd01 : ProcResGroup<[Zn3FPFAdd0, Zn3FPFAdd1]>; + +// All convert operations except pack/unpack +def Zn3FPFCvt01 : ProcResGroup<[Zn3FPFCvt0, Zn3FPFCvt1]>; + +// All Divide and Square Root except Reciprocal Approximation +// def Zn3FPFDiv : ProcResGroup<[Zn3FPFDiv]>; + +// Moves and Logical operations on Floating Point Data Types +def Zn3FPFMisc0123 : ProcResGroup<[Zn3FPFMisc0, Zn3FPFMisc1, Zn3FPFMisc2, Zn3FPFMisc3]>; + +def Zn3FPFMisc12 : ProcResGroup<[Zn3FPFMisc1, Zn3FPFMisc2]>; + +// Loads, Stores and Move to General Register (EX) Operations +// AMD SOG 19h, 2.11 Floating-Point Unit +// Stores and floating point to general purpose register transfer +// have 2 dedicated pipelines (pipe 5 and 6). +defvar Zn3FPLd01 = Zn3FPP45; + +// AMD SOG 19h, 2.11 Floating-Point Unit +// Note that FP stores are supported on two pipelines, +// but throughput is limited to one per cycle. +let Super = Zn3FPP45 in +def Zn3FPSt : ProcResource<1>; + +// Integer Adds, Subtracts, and Compares +// Some complex VADD operations are not available in all pipes. +def Zn3FPVAdd0123 : ProcResGroup<[Zn3FPVAdd0, Zn3FPVAdd1, Zn3FPVAdd2, Zn3FPVAdd3]>; + +def Zn3FPVAdd01: ProcResGroup<[Zn3FPVAdd0, Zn3FPVAdd1]>; +def Zn3FPVAdd12: ProcResGroup<[Zn3FPVAdd1, Zn3FPVAdd2]>; + +// Integer Multiplies, SAD, Blendvb +def Zn3FPVMul01 : ProcResGroup<[Zn3FPVMul0, Zn3FPVMul1]>; + +// Data Shuffles, Packs, Unpacks, Permute +// Some complex shuffle operations are only available in pipe1. +def Zn3FPVShuf01 : ProcResGroup<[Zn3FPVShuf, Zn3FPVShufAux]>; + +// Bit Shift Left/Right operations +def Zn3FPVShift01 : ProcResGroup<[Zn3FPVShift0, Zn3FPVShift1]>; + +// Moves and Logical operations on Packed Integer Data Types +def Zn3FPVMisc0123 : ProcResGroup<[Zn3FPVMisc0, Zn3FPVMisc1, Zn3FPVMisc2, Zn3FPVMisc3]>; + +// *AES* +def Zn3FPAES01 : ProcResGroup<[Zn3FPAES0, Zn3FPAES1]>; + +// *CLM* +def Zn3FPCLM01 : ProcResGroup<[Zn3FPCLM0, Zn3FPCLM1]>; + + +// +// Scheduling +//===----------------------------------------------------------------------===// + +// Agner, 21.8 Register renaming and out-of-order schedulers +// The floating point register file has 160 vector registers +// of 128 bits each in Zen 1 and 256 bits each in Zen 2. +// anandtech also confirms this. +def Zn3FpPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 1]>; + +// AMD SOG 19h, 2.11 Floating-Point Unit +// The floating-point scheduler has a 2*32 entry macro op capacity. +// AMD SOG 19h, 2.11 Floating-Point Unit +// <...> the scheduler can issue 1 micro op per cycle for each pipe. +// FIXME: those are two separate schedulers, not a single big one. +def Zn3FP : ProcResGroup<[Zn3FPP0, Zn3FPP2, /*Zn3FPP4,*/ // scheduler 0 + Zn3FPP1, Zn3FPP3, Zn3FPP45 /*Zn3FPP5*/ // scheduler 1 + ]> { + let BufferSize = !mul(2, 32); +} + +// AMD SOG 19h, 2.11 Floating-Point Unit +// Macro ops can be dispatched to the 64 entry Non Scheduling Queue (NSQ) +// even if floating-point scheduler is full. +// FIXME: how to model this properly? + + +//===----------------------------------------------------------------------===// +// Load-Store Unit +// + +// AMD SOG 19h, 2.12 Load-Store Unit +// The LS unit contains three largely independent pipe-lines +// enabling the execution of three 256-bit memory operations per cycle. +def Zn3LSU : ProcResource<3>; + +// AMD SOG 19h, 2.12 Load-Store Unit +// All three memory operations can be loads. +let Super = Zn3LSU in +def Zn3Load : ProcResource<3> { + // AMD SOG 19h, 2.12 Load-Store Unit + // The LS unit can process up to 72 out-of-order loads. + let BufferSize = 72; +} + +def Zn3LoadQueue : LoadQueue; + +// AMD SOG 19h, 2.12 Load-Store Unit +// A maximum of two of the memory operations can be stores. +let Super = Zn3LSU in +def Zn3Store : ProcResource<2> { + // AMD SOG 19h, 2.12 Load-Store Unit + // The LS unit utilizes a 64-entry store queue (STQ). + let BufferSize = 64; +} + +def Zn3StoreQueue : StoreQueue; + +//===----------------------------------------------------------------------===// +// Basic helper classes. +//===----------------------------------------------------------------------===// + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when dispatched by the schedulers. +// This multiclass defines the resource usage for variants with and without +// folded loads. + +multiclass __zn3WriteRes ExePorts, + int Lat = 1, list Res = [], int UOps = 1> { + def : WriteRes { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } +} + +multiclass __zn3WriteResPair ExePorts, int Lat, + list Res, int UOps, int LoadLat, int LoadUOps, + ProcResourceKind AGU, int LoadRes> { + defm : __zn3WriteRes; + + defm : __zn3WriteRes; +} + +// For classes without folded loads. +multiclass Zn3WriteResInt ExePorts, int Lat = 1, + list Res = [], int UOps = 1> { + defm : __zn3WriteRes; +} + +multiclass Zn3WriteResXMM ExePorts, int Lat = 1, + list Res = [], int UOps = 1> { + defm : __zn3WriteRes; +} + +multiclass Zn3WriteResYMM ExePorts, int Lat = 1, + list Res = [], int UOps = 1> { + defm : __zn3WriteRes; +} + +// For classes with folded loads. +multiclass Zn3WriteResIntPair ExePorts, int Lat = 1, + list Res = [], int UOps = 1, + int LoadUOps = 0, int LoadRes = 1> { + defm : __zn3WriteResPair; +} + +multiclass Zn3WriteResXMMPair ExePorts, int Lat = 1, + list Res = [], int UOps = 1, + int LoadUOps = 0, int LoadRes = 1> { + defm : __zn3WriteResPair; +} + +multiclass Zn3WriteResYMMPair ExePorts, int Lat = 1, + list Res = [], int UOps = 1, + int LoadUOps = 0, int LoadRes = 1> { + defm : __zn3WriteResPair; +} + + +//===----------------------------------------------------------------------===// +// Here be dragons. +//===----------------------------------------------------------------------===// + +def : ReadAdvance; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// AMD SOG 19h, 2.11 Floating-Point Unit +// There is 1 cycle of added latency for a result to cross +// from F to I or I to F domain. +def : ReadAdvance; + +// Instructions with both a load and a store folded are modeled as a folded +// load + WriteRMW. +defm : Zn3WriteResInt; + +// Loads, stores, and moves, not folded with other operations. +defm : Zn3WriteResInt; + +def Zn3WriteMOVSlow : SchedWriteRes<[Zn3AGU012, Zn3Load]> { + let Latency = !add(Znver3Model.LoadLatency, 1); + let ResourceCycles = [3, 1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>; + +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; + +def Zn3WriteMoveRenameable : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 0; + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteMoveRenameable], (instrs MOV32rr, MOV32rr_REV, + MOV64rr, MOV64rr_REV, + MOVSX32rr32)>; + +def Zn3WriteMOVBE16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { + let Latency = Znver3Model.LoadLatency; + let ResourceCycles = [1, 1, 4]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteMOVBE16rm], (instrs MOVBE16rm)>; + +def Zn3WriteMOVBEmr : SchedWriteRes<[Zn3ALU0123, Zn3AGU012, Zn3Store]> { + let Latency = Znver3Model.StoreLatency; + let ResourceCycles = [4, 1, 1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>; + +// Arithmetic. +defm : Zn3WriteResIntPair; // Simple integer ALU op. + +def Zn3WriteALUSlow : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 1; + let ResourceCycles = [4]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32, + AND8i8, AND16i16, AND32i32, AND64i32, + OR8i8, OR16i16, OR32i32, OR64i32, + SUB8i8, SUB16i16, SUB32i32, SUB64i32, + XOR8i8, XOR16i16, XOR32i32, XOR64i32)>; + +def Zn3WriteMoveExtend : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 1; + let ResourceCycles = [4]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>; + +def Zn3WriteMaterialize32bitImm: SchedWriteRes<[Zn3ALU0123]> { + let Latency = 1; + let ResourceCycles = [2]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>; + +def Zn3WritePDEP_PEXT : SchedWriteRes<[Zn3ALU1]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr, + PEXT32rr, PEXT64rr)>; + +defm : Zn3WriteResIntPair; // Integer ALU + flags op. + +def Zn3WriteADC8mr_SBB8mr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123, Zn3Store]> { + let Latency = 1; + let ResourceCycles = [1, 1, 7, 1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>; + +// This is for simple LEAs with one or two input operands. +defm : Zn3WriteResInt; // LEA instructions can't fold loads. + +// This write is used for slow LEA instructions. +def Zn3Write3OpsLEA : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 2; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset), +// or an LEA with a `Scale` value different than 1. +def Zn3SlowLEAPredicate : MCSchedPredicate< + CheckAny<[ + // A 3-operand LEA (base, index, offset). + IsThreeOperandsLEAFn, + // An LEA with a "Scale" different than 1. + CheckAll<[ + CheckIsImmOperand<2>, + CheckNot> + ]> + ]> +>; + +def Zn3WriteLEA : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; + +def : InstRW<[Zn3WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; + +def Zn3SlowLEA16r : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 2; // FIXME: not from llvm-exegesis + let ResourceCycles = [4]; + let NumMicroOps = 2; +} + +def : InstRW<[Zn3SlowLEA16r], (instrs LEA16r)>; + +// Integer multiplication +defm : Zn3WriteResIntPair; // Integer 8-bit multiplication. +defm : Zn3WriteResIntPair; // Integer 16-bit multiplication. +defm : Zn3WriteResIntPair; // Integer 16-bit multiplication by immediate. +defm : Zn3WriteResIntPair; // Integer 16-bit multiplication by register. +defm : Zn3WriteResIntPair; // Integer 32-bit multiplication. + +def Zn3MULX32rr : SchedWriteRes<[Zn3Multiplier]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3MULX32rr], (instrs MULX32rr)>; + +def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3MULX32rr.Latency); + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = Zn3MULX32rr.NumMicroOps; +} +def : InstRW<[Zn3MULX32rm], (instrs MULX32rm)>; + +defm : Zn3WriteResIntPair; // Integer 32-bit multiplication by immediate. +defm : Zn3WriteResIntPair; // Integer 32-bit multiplication by register. +defm : Zn3WriteResIntPair; // Integer 64-bit multiplication. + +def Zn3MULX64rr : SchedWriteRes<[Zn3Multiplier]> { + let Latency = 4; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3MULX64rr], (instrs MULX64rr)>; + +def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3MULX64rr.Latency); + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = Zn3MULX64rr.NumMicroOps; +} +def : InstRW<[Zn3MULX64rm], (instrs MULX64rm)>; + +defm : Zn3WriteResIntPair; // Integer 64-bit multiplication by immediate. +defm : Zn3WriteResIntPair; // Integer 64-bit multiplication by register. +defm : Zn3WriteResInt; // Integer multiplication, high part. + +defm : Zn3WriteResInt; // Byte Order (Endianness) 32-bit Swap. +defm : Zn3WriteResInt; // Byte Order (Endianness) 64-bit Swap. + +defm : Zn3WriteResIntPair; // Compare and set, compare and swap. + +def Zn3WriteCMPXCHG8rr : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 3; + let ResourceCycles = [12]; + let NumMicroOps = 3; +} +def : InstRW<[Zn3WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; + +defm : Zn3WriteResInt; // Compare and set, compare and swap. + +def Zn3WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteCMPXCHG8rr.Latency); + let ResourceCycles = [1, 1, 12]; + let NumMicroOps = !add(Zn3WriteCMPXCHG8rr.NumMicroOps, 2); +} +def : InstRW<[Zn3WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>; + +def Zn3WriteCMPXCHG8B : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 3; // FIXME: not from llvm-exegesis + let ResourceCycles = [24]; + let NumMicroOps = 19; +} +def : InstRW<[Zn3WriteCMPXCHG8B], (instrs CMPXCHG8B)>; + +def Zn3WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 4; // FIXME: not from llvm-exegesis + let ResourceCycles = [59]; + let NumMicroOps = 28; +} +def : InstRW<[Zn3WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>; + +defm : Zn3WriteResInt; // Compare+Exchange - TODO RMW support. + +def Zn3WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 1; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>; + +def Zn3WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { + let Latency = !add(Znver3Model.LoadLatency, 3); // FIXME: not from llvm-exegesis + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = 5; +} +def : InstRW<[Zn3WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>; + +def Zn3WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU0123]> { + let Latency = !add(Znver3Model.LoadLatency, 2); // FIXME: not from llvm-exegesis + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>; + +// Integer division. +// FIXME: uops for 8-bit division measures as 2. for others it's a guess. +// FIXME: latency for 8-bit division measures as 10. for others it's a guess. +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; + +defm : Zn3WriteResIntPair; // Bit scan forward. +defm : Zn3WriteResIntPair; // Bit scan reverse. + +defm : Zn3WriteResIntPair; // Bit population count. + +def Zn3WritePOPCNT16rr : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 1; + let ResourceCycles = [4]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WritePOPCNT16rr], (instrs POPCNT16rr)>; + +defm : Zn3WriteResIntPair; // Leading zero count. + +def Zn3WriteLZCNT16rr : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 1; + let ResourceCycles = [4]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteLZCNT16rr], (instrs LZCNT16rr)>; + +defm : Zn3WriteResIntPair; // Trailing zero count. + +def Zn3WriteTZCNT16rr : SchedWriteRes<[Zn3ALU0123]> { + let Latency = 2; + let ResourceCycles = [4]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteTZCNT16rr], (instrs TZCNT16rr)>; + +defm : Zn3WriteResIntPair; // Conditional move. +defm : Zn3WriteResInt; // FIXME: not from llvm-exegesis // X87 conditional move. +defm : Zn3WriteResInt; // Set register based on condition code. +defm : Zn3WriteResInt; // FIXME: latency not from llvm-exegesis +defm : Zn3WriteResInt; // Load/Store flags in AH. + +defm : Zn3WriteResInt; // Bit Test +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; + +defm : Zn3WriteResInt; // Bit Test + Set +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; + +// Integer shifts and rotates. +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; + +def Zn3WriteRotateR1 : SchedWriteRes<[Zn3ALU12]> { + let Latency = 1; + let ResourceCycles = [2]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1, + RCR8r1, RCR16r1, RCR32r1, RCR64r1)>; + +def Zn3WriteRotateM1 : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateR1.Latency); + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = !add(Zn3WriteRotateR1.NumMicroOps, 1); +} +def : InstRW<[Zn3WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1, + RCR8m1, RCR16m1, RCR32m1, RCR64m1)>; + +def Zn3WriteRotateRightRI : SchedWriteRes<[Zn3ALU12]> { + let Latency = 3; + let ResourceCycles = [6]; + let NumMicroOps = 7; +} +def : InstRW<[Zn3WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>; + +def Zn3WriteRotateRightMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRI.Latency); + let ResourceCycles = [1, 1, 8]; + let NumMicroOps = !add(Zn3WriteRotateRightRI.NumMicroOps, 3); +} +def : InstRW<[Zn3WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>; + +def Zn3WriteRotateLeftRI : SchedWriteRes<[Zn3ALU12]> { + let Latency = 4; + let ResourceCycles = [8]; + let NumMicroOps = 9; +} +def : InstRW<[Zn3WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>; + +def Zn3WriteRotateLeftMI : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRI.Latency); + let ResourceCycles = [1, 1, 8]; + let NumMicroOps = !add(Zn3WriteRotateLeftRI.NumMicroOps, 2); +} +def : InstRW<[Zn3WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>; + +defm : Zn3WriteResIntPair; + +def Zn3WriteRotateRightRCL : SchedWriteRes<[Zn3ALU12]> { + let Latency = 3; + let ResourceCycles = [6]; + let NumMicroOps = 7; +} +def : InstRW<[Zn3WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>; + +def Zn3WriteRotateRightMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateRightRCL.Latency); + let ResourceCycles = [1, 1, 8]; + let NumMicroOps = !add(Zn3WriteRotateRightRCL.NumMicroOps, 2); +} +def : InstRW<[Zn3WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>; + +def Zn3WriteRotateLeftRCL : SchedWriteRes<[Zn3ALU12]> { + let Latency = 4; + let ResourceCycles = [8]; + let NumMicroOps = 9; +} +def : InstRW<[Zn3WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>; + +def Zn3WriteRotateLeftMCL : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3ALU12]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteRotateLeftRCL.Latency); + let ResourceCycles = [1, 1, 8]; + let NumMicroOps = !add(Zn3WriteRotateLeftRCL.NumMicroOps, 2); +} +def : InstRW<[Zn3WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>; + +// Double shift instructions. +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; + +// BMI1 BEXTR/BLS, BMI2 BZHI +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; +defm : Zn3WriteResIntPair; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +defm : Zn3WriteResInt; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm : Zn3WriteResIntPair; // FIXME: not from llvm-exegesis + +// Floating point. This covers both scalar and vector operations. +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; + +def Zn3WriteWriteFStoreMMX : SchedWriteRes<[Zn3FPSt, Zn3Store]> { + let Latency = 2; // FIXME: not from llvm-exegesis + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteWriteFStoreMMX], (instrs MOVHPDmr, MOVHPSmr, + VMOVHPDmr, VMOVHPSmr)>; + +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; + +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResYMM; + +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; + +defm : Zn3WriteResXMMPair; // Floating point add/sub. + +def Zn3WriteX87Arith : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { + let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis + let ResourceCycles = [1, 1, 24]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m, + SUB_FI16m, SUB_FI32m, + SUBR_FI16m, SUBR_FI32m, + MUL_FI16m, MUL_FI32m)>; + +def Zn3WriteX87Div : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { + let Latency = !add(Znver3Model.LoadLatency, 1); // FIXME: not from llvm-exegesis + let ResourceCycles = [1, 1, 62]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteX87Div], (instrs DIV_FI16m, DIV_FI32m, + DIVR_FI16m, DIVR_FI32m)>; + +defm : Zn3WriteResXMMPair; // Floating point add/sub (XMM). +defm : Zn3WriteResYMMPair; // Floating point add/sub (YMM). +defm : X86WriteResPairUnsupported; // Floating point add/sub (ZMM). +defm : Zn3WriteResXMMPair; // Floating point double add/sub. +defm : Zn3WriteResXMMPair; // Floating point double add/sub (XMM). +defm : Zn3WriteResYMMPair; // Floating point double add/sub (YMM). +defm : X86WriteResPairUnsupported; // Floating point double add/sub (ZMM). +defm : Zn3WriteResXMMPair; // Floating point compare. +defm : Zn3WriteResXMMPair; // Floating point compare (XMM). +defm : Zn3WriteResYMMPair; // Floating point compare (YMM). +defm : X86WriteResPairUnsupported; // Floating point compare (ZMM). +defm : Zn3WriteResXMMPair; // Floating point double compare. +defm : Zn3WriteResXMMPair; // Floating point double compare (XMM). +defm : Zn3WriteResYMMPair; // Floating point double compare (YMM). +defm : X86WriteResPairUnsupported; // Floating point double compare (ZMM). +defm : Zn3WriteResXMMPair; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (X87). +defm : Zn3WriteResXMMPair; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (SSE). +defm : Zn3WriteResXMMPair; // Floating point multiplication. +defm : Zn3WriteResXMMPair; // Floating point multiplication (XMM). +defm : Zn3WriteResYMMPair; // Floating point multiplication (YMM). +defm : X86WriteResPairUnsupported; // Floating point multiplication (YMM). +defm : Zn3WriteResXMMPair; // Floating point double multiplication. +defm : Zn3WriteResXMMPair; // Floating point double multiplication (XMM). +defm : Zn3WriteResYMMPair; // Floating point double multiplication (YMM). +defm : X86WriteResPairUnsupported; // Floating point double multiplication (ZMM). +defm : Zn3WriteResXMMPair; // Floating point division. +defm : Zn3WriteResXMMPair; // Floating point division (XMM). +defm : Zn3WriteResYMMPair; // Floating point division (YMM). +defm : X86WriteResPairUnsupported; // Floating point division (ZMM). +defm : Zn3WriteResXMMPair; // Floating point double division. +defm : Zn3WriteResXMMPair; // Floating point double division (XMM). +defm : Zn3WriteResYMMPair; // Floating point double division (YMM). +defm : X86WriteResPairUnsupported; // Floating point double division (ZMM). +defm : Zn3WriteResXMMPair; // Floating point square root. +defm : Zn3WriteResXMMPair; // Floating point square root (XMM). +defm : Zn3WriteResYMMPair; // Floating point square root (YMM). +defm : X86WriteResPairUnsupported; // Floating point square root (ZMM). +defm : Zn3WriteResXMMPair; // Floating point double square root. +defm : Zn3WriteResXMMPair; // Floating point double square root (XMM). +defm : Zn3WriteResYMMPair; // Floating point double square root (YMM). +defm : X86WriteResPairUnsupported; // Floating point double square root (ZMM). +defm : Zn3WriteResXMMPair; // FIXME: latency not from llvm-exegesis // Floating point long double square root. +defm : Zn3WriteResXMMPair; // Floating point reciprocal estimate. +defm : Zn3WriteResXMMPair; // Floating point reciprocal estimate (XMM). +defm : Zn3WriteResYMMPair; // Floating point reciprocal estimate (YMM). +defm : X86WriteResPairUnsupported; // Floating point reciprocal estimate (ZMM). +defm : Zn3WriteResXMMPair; // Floating point reciprocal square root estimate. +defm : Zn3WriteResXMMPair; // Floating point reciprocal square root estimate (XMM). +defm : Zn3WriteResYMMPair; // Floating point reciprocal square root estimate (YMM). +defm : X86WriteResPairUnsupported; // Floating point reciprocal square root estimate (ZMM). +defm : Zn3WriteResXMMPair; // Fused Multiply Add. +defm : Zn3WriteResXMMPair; // Fused Multiply Add (XMM). +defm : Zn3WriteResYMMPair; // Fused Multiply Add (YMM). +defm : X86WriteResPairUnsupported; // Fused Multiply Add (ZMM). +defm : Zn3WriteResXMMPair; // Floating point double dot product. +defm : Zn3WriteResXMMPair; // Floating point single dot product. +defm : Zn3WriteResYMMPair; // Floating point single dot product (YMM). +defm : X86WriteResPairUnsupported; // Floating point single dot product (ZMM). +defm : Zn3WriteResXMMPair; // FIXME: latency not from llvm-exegesis // Floating point fabs/fchs. +defm : Zn3WriteResXMMPair; // Floating point rounding. +defm : Zn3WriteResYMMPair; // Floating point rounding (YMM). +defm : X86WriteResPairUnsupported; // Floating point rounding (ZMM). +defm : Zn3WriteResXMMPair; // Floating point and/or/xor logicals. +defm : Zn3WriteResYMMPair; // Floating point and/or/xor logicals (YMM). +defm : X86WriteResPairUnsupported; // Floating point and/or/xor logicals (ZMM). +defm : Zn3WriteResXMMPair; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions. +defm : Zn3WriteResYMMPair; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (YMM). +defm : X86WriteResPairUnsupported; // Floating point TEST instructions (ZMM). +defm : Zn3WriteResXMMPair; // Floating point vector shuffles. +defm : Zn3WriteResYMMPair; // Floating point vector shuffles (YMM). +defm : X86WriteResPairUnsupported; // Floating point vector shuffles (ZMM). +defm : Zn3WriteResXMMPair; // Floating point vector variable shuffles. +defm : Zn3WriteResYMMPair; // Floating point vector variable shuffles (YMM). +defm : X86WriteResPairUnsupported; // Floating point vector variable shuffles (ZMM). +defm : Zn3WriteResXMMPair; // Floating point vector blends. +defm : Zn3WriteResYMMPair; // Floating point vector blends (YMM). +defm : X86WriteResPairUnsupported; // Floating point vector blends (ZMM). +defm : Zn3WriteResXMMPair; // Fp vector variable blends. +defm : Zn3WriteResYMMPair; // Fp vector variable blends (YMM). +defm : X86WriteResPairUnsupported; // Fp vector variable blends (ZMM). + +// Horizontal Add/Sub (float and integer) +defm : Zn3WriteResXMMPair; +defm : Zn3WriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : Zn3WriteResXMMPair; +defm : Zn3WriteResXMMPair; +defm : Zn3WriteResYMMPair; +defm : X86WriteResPairUnsupported; + +// Vector integer operations. +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; + +def Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn3FPFMisc0]> { + let Latency = 4; + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>; + +def Zn3WriteVEXTRACTI128mr : SchedWriteRes<[Zn3FPFMisc0, Zn3FPSt, Zn3Store]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency); + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1); +} +def : InstRW<[Zn3WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>; + +def Zn3WriteVINSERTF128rmr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPFMisc0]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency); + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0); +} +def : InstRW<[Zn3WriteVINSERTF128rmr], (instrs VINSERTF128rm)>; + +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; + +def Zn3WriteMOVMMX : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> { + let Latency = 1; + let ResourceCycles = [1, 2]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>; + +def Zn3WriteMOVMMXSlow : SchedWriteRes<[Zn3FPLd01, Zn3FPFMisc0123]> { + let Latency = 1; + let ResourceCycles = [1, 4]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>; + +defm : Zn3WriteResXMMPair; // Vector integer ALU op, no logicals. + +def Zn3WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> { + let Latency = 3; + let ResourceCycles = [1, 1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>; + +def Zn3WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn3FPVShuf01, Zn3FPLd01]> { + let Latency = 3; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>; + +defm : Zn3WriteResXMMPair; // Vector integer ALU op, no logicals (XMM). + +def Zn3WriteVecALUXSlow : SchedWriteRes<[Zn3FPVAdd01]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr, + PADDSBrr, PADDSWrr, PADDUSBrr, PADDUSWrr, + PAVGBrr, PAVGWrr, + PSIGNBrr, PSIGNDrr, PSIGNWrr, + VPABSBrr, VPABSDrr, VPABSWrr, + VPADDSBrr, VPADDSWrr, VPADDUSBrr, VPADDUSWrr, + VPAVGBrr, VPAVGWrr, + VPCMPEQQrr, + VPSIGNBrr, VPSIGNDrr, VPSIGNWrr, + PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr)>; + +def Zn3WriteVecALUXMMX : SchedWriteRes<[Zn3FPVAdd01]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteVecALUXMMX], (instrs MMX_PABSBrr, MMX_PABSDrr, MMX_PABSWrr, + MMX_PSIGNBrr, MMX_PSIGNDrr, MMX_PSIGNWrr, + MMX_PADDSBirr, MMX_PADDSWirr, MMX_PADDUSBirr, MMX_PADDUSWirr, + MMX_PAVGBirr, MMX_PAVGWirr, + MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr)>; + +defm : Zn3WriteResYMMPair; // Vector integer ALU op, no logicals (YMM). + +def Zn3WriteVecALUYSlow : SchedWriteRes<[Zn3FPVAdd01]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr, + VPADDSBYrr, VPADDSWYrr, VPADDUSBYrr, VPADDUSWYrr, + VPSUBSBYrr, VPSUBSWYrr, VPSUBUSBYrr, VPSUBUSWYrr, + VPAVGBYrr, VPAVGWYrr, + VPCMPEQQYrr, + VPSIGNBYrr, VPSIGNDYrr, VPSIGNWYrr)>; + +defm : X86WriteResPairUnsupported; // Vector integer ALU op, no logicals (ZMM). +defm : Zn3WriteResXMMPair; // Vector integer and/or/xor logicals. +defm : Zn3WriteResXMMPair; // Vector integer and/or/xor logicals (XMM). +defm : Zn3WriteResYMMPair; // Vector integer and/or/xor logicals (YMM). +defm : X86WriteResPairUnsupported; // Vector integer and/or/xor logicals (ZMM). +defm : Zn3WriteResXMMPair; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions. +defm : Zn3WriteResYMMPair; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions (YMM). +defm : X86WriteResPairUnsupported; // Vector integer TEST instructions (ZMM). +defm : Zn3WriteResXMMPair; // Vector integer shifts (default). +defm : Zn3WriteResXMMPair; // Vector integer shifts (XMM). +defm : Zn3WriteResYMMPair; // Vector integer shifts (YMM). +defm : X86WriteResPairUnsupported; // Vector integer shifts (ZMM). +defm : Zn3WriteResXMMPair; // Vector integer immediate shifts (default). +defm : Zn3WriteResXMMPair; // Vector integer immediate shifts (XMM). +defm : Zn3WriteResYMMPair; // Vector integer immediate shifts (YMM). +defm : X86WriteResPairUnsupported; // Vector integer immediate shifts (ZMM). +defm : Zn3WriteResXMMPair; // Vector integer multiply (default). +defm : Zn3WriteResXMMPair; // Vector integer multiply (XMM). +defm : Zn3WriteResYMMPair; // Vector integer multiply (YMM). +defm : X86WriteResPairUnsupported; // Vector integer multiply (ZMM). +defm : Zn3WriteResXMMPair; // Vector PMULLD. +defm : Zn3WriteResYMMPair; // Vector PMULLD (YMM). +defm : X86WriteResPairUnsupported; // Vector PMULLD (ZMM). +defm : Zn3WriteResXMMPair; // Vector shuffles. +defm : Zn3WriteResXMMPair; // Vector shuffles (XMM). +defm : Zn3WriteResYMMPair; // Vector shuffles (YMM). +defm : X86WriteResPairUnsupported; // Vector shuffles (ZMM). +defm : Zn3WriteResXMMPair; // Vector variable shuffles. +defm : Zn3WriteResXMMPair; // Vector variable shuffles (XMM). +defm : Zn3WriteResYMMPair; // Vector variable shuffles (YMM). +defm : X86WriteResPairUnsupported; // Vector variable shuffles (ZMM). +defm : Zn3WriteResXMMPair; // Vector blends. +defm : Zn3WriteResYMMPair; // Vector blends (YMM). +defm : X86WriteResPairUnsupported; // Vector blends (ZMM). +defm : Zn3WriteResXMMPair; // Vector variable blends. +defm : Zn3WriteResYMMPair; // Vector variable blends (YMM). +defm : X86WriteResPairUnsupported; // Vector variable blends (ZMM). +defm : Zn3WriteResXMMPair; // Vector PSADBW. +defm : Zn3WriteResXMMPair; // Vector PSADBW (XMM). +defm : Zn3WriteResYMMPair; // Vector PSADBW (YMM). +defm : X86WriteResPairUnsupported; // Vector PSADBW (ZMM). +defm : Zn3WriteResXMMPair; // Vector MPSAD. +defm : Zn3WriteResYMMPair; // Vector MPSAD (YMM). +defm : X86WriteResPairUnsupported; // Vector MPSAD (ZMM). +defm : Zn3WriteResXMMPair; // Vector PHMINPOS. + +// Vector insert/extract operations. +defm : Zn3WriteResXMMPair; // Insert gpr to vector element. +defm : Zn3WriteResXMM; // Extract vector element to gpr. +defm : Zn3WriteResXMM; // Extract vector element and store. + +// MOVMSK operations. +defm : Zn3WriteResXMM; +defm : Zn3WriteResXMM; +defm : Zn3WriteResYMM; +defm : Zn3WriteResXMM; + +// Conversion between integer and float. +defm : Zn3WriteResXMMPair; // Double -> Integer. +defm : Zn3WriteResXMMPair; // Double -> Integer (XMM). +defm : Zn3WriteResYMMPair; // Double -> Integer (YMM). +defm : X86WriteResPairUnsupported; // Double -> Integer (ZMM). + +def Zn3WriteCvtPD2IMMX : SchedWriteRes<[Zn3FPFCvt01]> { + let Latency = 1; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteCvtPD2IMMX], (instrs MMX_CVTPD2PIirm, MMX_CVTTPD2PIirm, MMX_CVTPD2PIirr, MMX_CVTTPD2PIirr)>; + +defm : Zn3WriteResXMMPair; // Float -> Integer. + +defm : Zn3WriteResXMMPair; // Float -> Integer (XMM). +defm : Zn3WriteResYMMPair; // Float -> Integer (YMM). +defm : X86WriteResPairUnsupported; // Float -> Integer (ZMM). + +defm : Zn3WriteResXMMPair; // Integer -> Double. +defm : Zn3WriteResXMMPair; // Integer -> Double (XMM). +defm : Zn3WriteResYMMPair; // Integer -> Double (YMM). +defm : X86WriteResPairUnsupported; // Integer -> Double (ZMM). + +def Zn3WriteCvtI2PDMMX : SchedWriteRes<[Zn3FPFCvt01]> { + let Latency = 2; + let ResourceCycles = [6]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteCvtI2PDMMX], (instrs MMX_CVTPI2PDirm, MMX_CVTPI2PDirr)>; + +defm : Zn3WriteResXMMPair; // Integer -> Float. +defm : Zn3WriteResXMMPair; // Integer -> Float (XMM). +defm : Zn3WriteResYMMPair; // Integer -> Float (YMM). +defm : X86WriteResPairUnsupported; // Integer -> Float (ZMM). + +def Zn3WriteCvtI2PSMMX : SchedWriteRes<[Zn3FPFCvt01]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteCvtI2PSMMX], (instrs MMX_CVTPI2PSirr)>; + +defm : Zn3WriteResXMMPair; // Float -> Double size conversion. +defm : Zn3WriteResXMMPair; // Float -> Double size conversion (XMM). +defm : Zn3WriteResYMMPair; // Float -> Double size conversion (YMM). +defm : X86WriteResPairUnsupported; // Float -> Double size conversion (ZMM). + +defm : Zn3WriteResXMMPair; // Double -> Float size conversion. +defm : Zn3WriteResXMMPair; // Double -> Float size conversion (XMM). +defm : Zn3WriteResYMMPair; // Double -> Float size conversion (YMM). +defm : X86WriteResPairUnsupported; // Double -> Float size conversion (ZMM). + +defm : Zn3WriteResXMMPair; // Half -> Float size conversion. +defm : Zn3WriteResYMMPair; // Half -> Float size conversion (YMM). +defm : X86WriteResPairUnsupported; // Half -> Float size conversion (ZMM). + +defm : Zn3WriteResXMM; // Float -> Half size conversion. +defm : Zn3WriteResYMM; // Float -> Half size conversion (YMM). +defm : X86WriteResUnsupported; // Float -> Half size conversion (ZMM). +defm : Zn3WriteResXMM; // Float -> Half + store size conversion. +defm : Zn3WriteResYMM; // Float -> Half + store size conversion (YMM). +defm : X86WriteResUnsupported; // Float -> Half + store size conversion (ZMM). + +// CRC32 instruction. +defm : Zn3WriteResIntPair; + +def Zn3WriteSHA1MSG1rr : SchedWriteRes<[Zn3FPU0123]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>; + +def Zn3WriteSHA1MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG1rr.Latency); + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = !add(Zn3WriteSHA1MSG1rr.NumMicroOps, 0); +} +def : InstRW<[Zn3WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>; + +def Zn3WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn3FPU0123]> { + let Latency = 1; + let ResourceCycles = [2]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>; + +def Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency); + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = !add(Zn3WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0); +} +def : InstRW<[Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>; + +def Zn3WriteSHA256MSG1rr : SchedWriteRes<[Zn3FPU0123]> { + let Latency = 2; + let ResourceCycles = [3]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>; + +def Zn3Writerm_SHA256MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG1rr.Latency); + let ResourceCycles = [1, 1, 3]; + let NumMicroOps = !add(Zn3WriteSHA256MSG1rr.NumMicroOps, 0); +} +def : InstRW<[Zn3Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>; + +def Zn3WriteSHA256MSG2rr : SchedWriteRes<[Zn3FPU0123]> { + let Latency = 3; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} +def : InstRW<[Zn3WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>; + +def Zn3WriteSHA256MSG2rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG2rr.Latency); + let ResourceCycles = [1, 1, 8]; + let NumMicroOps = !add(Zn3WriteSHA256MSG2rr.NumMicroOps, 1); +} +def : InstRW<[Zn3WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>; + +def Zn3WriteSHA1RNDS4rri : SchedWriteRes<[Zn3FPU0123]> { + let Latency = 6; + let ResourceCycles = [8]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>; + +def Zn3WriteSHA256RNDS2rr : SchedWriteRes<[Zn3FPU0123]> { + let Latency = 4; + let ResourceCycles = [8]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>; + +// Strings instructions. +// Packed Compare Implicit Length Strings, Return Mask +defm : Zn3WriteResXMMPair; +// Packed Compare Explicit Length Strings, Return Mask +defm : Zn3WriteResXMMPair; +// Packed Compare Implicit Length Strings, Return Index +defm : Zn3WriteResXMMPair; +// Packed Compare Explicit Length Strings, Return Index +defm : Zn3WriteResXMMPair; + +// AES instructions. +defm : Zn3WriteResXMMPair; // Decryption, encryption. +defm : Zn3WriteResXMMPair; // InvMixColumn. +defm : Zn3WriteResXMMPair; // Key Generation. + +// Carry-less multiplication instructions. +defm : Zn3WriteResXMMPair; + +// EMMS/FEMMS +defm : Zn3WriteResInt; // FIXME: latency not from llvm-exegesis + +// Load/store MXCSR +defm : Zn3WriteResInt; // FIXME: latency not from llvm-exegesis +defm : Zn3WriteResInt; // FIXME: latency not from llvm-exegesis + +// Catch-all for expensive system instructions. +defm : Zn3WriteResInt; + +def Zn3WriteVZEROUPPER : SchedWriteRes<[Zn3FPU0123]> { + let Latency = 0; // FIXME: not from llvm-exegesis + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteVZEROUPPER], (instrs VZEROUPPER)>; + +def Zn3WriteVZEROALL : SchedWriteRes<[Zn3FPU0123]> { + let Latency = 10; // FIXME: not from llvm-exegesis + let ResourceCycles = [24]; + let NumMicroOps = 18; +} +def : InstRW<[Zn3WriteVZEROALL], (instrs VZEROALL)>; + +// AVX2. +defm : Zn3WriteResYMMPair; // Fp 256-bit width vector shuffles. +defm : Zn3WriteResYMMPair; // Fp 256-bit width variable shuffles. +defm : Zn3WriteResYMMPair; // 256-bit width vector shuffles. + +def Zn3WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn3FPVShuf]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>; + +def Zn3WriteVPERM2F128rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency); + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = !add(Zn3WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0); +} +def : InstRW<[Zn3WriteVPERM2F128rm], (instrs VPERM2F128rm)>; + +def Zn3WriteVPERMPSYrr : SchedWriteRes<[Zn3FPVShuf]> { + let Latency = 7; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteVPERMPSYrr], (instrs VPERMPSYrr)>; + +def Zn3WriteVPERMPSYrm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMPSYrr.Latency); + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = !add(Zn3WriteVPERMPSYrr.NumMicroOps, 1); +} +def : InstRW<[Zn3WriteVPERMPSYrm], (instrs VPERMPSYrm)>; + +def Zn3WriteVPERMYri : SchedWriteRes<[Zn3FPVShuf]> { + let Latency = 6; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>; + +def Zn3WriteVPERMPDYmi : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMYri.Latency); + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = !add(Zn3WriteVPERMYri.NumMicroOps, 1); +} +def : InstRW<[Zn3WriteVPERMPDYmi], (instrs VPERMPDYmi)>; + +def Zn3WriteVPERMDYrr : SchedWriteRes<[Zn3FPVShuf]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} +def : InstRW<[Zn3WriteVPERMDYrr], (instrs VPERMDYrr)>; + +def Zn3WriteVPERMYm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> { + let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMDYrr.Latency); + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = !add(Zn3WriteVPERMDYrr.NumMicroOps, 0); +} +def : InstRW<[Zn3WriteVPERMYm], (instrs VPERMQYmi, VPERMDYrm)>; + +defm : Zn3WriteResYMMPair; // 256-bit width packed vector width-changing move. +defm : Zn3WriteResYMMPair; // 256-bit width vector variable shuffles. +defm : Zn3WriteResXMMPair; // Variable vector shifts. +defm : Zn3WriteResYMMPair; // Variable vector shifts (YMM). +defm : X86WriteResPairUnsupported; // Variable vector shifts (ZMM). + +// Old microcoded instructions that nobody use. +defm : Zn3WriteResInt; + +// Fence instructions. +defm : Zn3WriteResInt; + +def Zn3WriteLFENCE : SchedWriteRes<[Zn3LSU]> { + let Latency = 1; + let ResourceCycles = [30]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteLFENCE], (instrs LFENCE)>; + +def Zn3WriteSFENCE : SchedWriteRes<[Zn3LSU]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 1; +} +def : InstRW<[Zn3WriteSFENCE], (instrs SFENCE)>; + +// Nop, not very useful expect it provides a model for nops! +defm : Zn3WriteResInt; // FIXME: latency not from llvm-exegesis + +} // SchedModel diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2101,15 +2101,10 @@ } else if (auto *StInst = dyn_cast(Storage)) { Storage = StInst->getOperand(0); } else if (auto *GEPInst = dyn_cast(Storage)) { - SmallVector AdditionalValues; - DIExpression *SalvagedExpr = llvm::salvageDebugInfoImpl( - *GEPInst, Expr, - /*WithStackValue=*/false, 0, AdditionalValues); - // Debug declares cannot currently handle additional location - // operands. - if (!SalvagedExpr || !AdditionalValues.empty()) - break; - Expr = SalvagedExpr; + Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr, + /*WithStackValue=*/false, 0); + if (!Expr) + return; Storage = GEPInst->getOperand(0); } else if (auto *BCInst = dyn_cast(Storage)) Storage = BCInst->getOperand(0); diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1565,9 +1565,6 @@ if (!LoopPredication) return false; - if (!SE->hasLoopInvariantBackedgeTakenCount(L)) - return false; - // Note: ExactBTC is the exact backedge taken count *iff* the loop exits // through *explicit* control flow. We have to eliminate the possibility of // implicit exits (see below) before we know it's truly exact. diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1739,26 +1739,17 @@ is_contained(DIILocation, &I) && "DbgVariableIntrinsic must use salvaged instruction as its location"); unsigned LocNo = std::distance(DIILocation.begin(), find(DIILocation, &I)); - SmallVector AdditionalValues; - DIExpression *SalvagedExpr = salvageDebugInfoImpl( - I, DII->getExpression(), StackValue, LocNo, AdditionalValues); + + DIExpression *DIExpr = + salvageDebugInfoImpl(I, DII->getExpression(), StackValue, LocNo); // salvageDebugInfoImpl should fail on examining the first element of // DbgUsers, or none of them. - if (!SalvagedExpr) + if (!DIExpr) break; DII->replaceVariableLocationOp(&I, I.getOperand(0)); - if (AdditionalValues.empty()) { - DII->setExpression(SalvagedExpr); - } else if (isa(DII)) { - DII->addVariableLocationOps(AdditionalValues, SalvagedExpr); - } else { - // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is - // currently only valid for stack value expressions. - Value *Undef = UndefValue::get(I.getOperand(0)->getType()); - DII->replaceVariableLocationOp(I.getOperand(0), Undef); - } + DII->setExpression(DIExpr); LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); Salvaged = true; } @@ -1773,27 +1764,12 @@ } bool getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL, - uint64_t CurrentLocOps, - SmallVectorImpl &Opcodes, - SmallVectorImpl &AdditionalValues) { + SmallVectorImpl &Opcodes) { unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace()); - // Rewrite a GEP into a DIExpression. - SmallDenseMap VariableOffsets; + // Rewrite a constant GEP into a DIExpression. APInt ConstantOffset(BitWidth, 0); - if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) + if (!GEP->accumulateConstantOffset(DL, ConstantOffset)) return false; - if (!VariableOffsets.empty() && !CurrentLocOps) { - Opcodes.insert(Opcodes.begin(), {dwarf::DW_OP_LLVM_arg, 0}); - CurrentLocOps = 1; - } - for (auto Offset : VariableOffsets) { - AdditionalValues.push_back(Offset.first); - assert(Offset.second.isStrictlyPositive() && - "Expected strictly positive multiplier for offset."); - Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps++, dwarf::DW_OP_constu, - Offset.second.getZExtValue(), dwarf::DW_OP_mul, - dwarf::DW_OP_plus}); - } DIExpression::appendOffset(Opcodes, ConstantOffset.getSExtValue()); return true; } @@ -1828,35 +1804,23 @@ } } -bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps, - SmallVectorImpl &Opcodes, - SmallVectorImpl &AdditionalValues) { - // Handle binary operations with constant integer operands as a special case. +bool getSalvageOpsForBinOp(BinaryOperator *BI, + SmallVectorImpl &Opcodes) { + // Rewrite binary operations with constant integer operands. auto *ConstInt = dyn_cast(BI->getOperand(1)); - // Values wider than 64 bits cannot be represented within a DIExpression. - if (ConstInt && ConstInt->getBitWidth() > 64) + if (!ConstInt || ConstInt->getBitWidth() > 64) return false; - + uint64_t Val = ConstInt->getSExtValue(); Instruction::BinaryOps BinOpcode = BI->getOpcode(); - // Push any Constant Int operand onto the expression stack. - if (ConstInt) { - uint64_t Val = ConstInt->getSExtValue(); - // Add or Sub Instructions with a constant operand can potentially be - // simplified. - if (BinOpcode == Instruction::Add || BinOpcode == Instruction::Sub) { - uint64_t Offset = BinOpcode == Instruction::Add ? Val : -int64_t(Val); - DIExpression::appendOffset(Opcodes, Offset); - return true; - } - Opcodes.append({dwarf::DW_OP_constu, Val}); - } else { - if (!CurrentLocOps) { - Opcodes.append({dwarf::DW_OP_LLVM_arg, 0}); - CurrentLocOps = 1; - } - Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps}); - AdditionalValues.push_back(BI->getOperand(1)); + // Add or Sub Instructions with a constant operand can potentially be + // simplified. + if (BinOpcode == Instruction::Add || BinOpcode == Instruction::Sub) { + uint64_t Offset = BinOpcode == Instruction::Add ? Val : -int64_t(Val); + DIExpression::appendOffset(Opcodes, Offset); + return true; } + // Add constant int operand to expression stack. + Opcodes.append({dwarf::DW_OP_constu, Val}); // Add salvaged binary operator to expression stack, if it has a valid // representation in a DIExpression. @@ -1868,11 +1832,9 @@ return true; } -DIExpression * -llvm::salvageDebugInfoImpl(Instruction &I, DIExpression *SrcDIExpr, - bool WithStackValue, unsigned LocNo, - SmallVectorImpl &AdditionalValues) { - uint64_t CurrentLocOps = SrcDIExpr->getNumLocationOperands(); +DIExpression *llvm::salvageDebugInfoImpl(Instruction &I, + DIExpression *SrcDIExpr, + bool WithStackValue, unsigned LocNo) { auto &M = *I.getModule(); auto &DL = M.getDataLayout(); @@ -1886,7 +1848,7 @@ }; // initializer-list helper for applying operators to the source DIExpression. - auto applyOps = [&](ArrayRef Opcodes) { + auto applyOps = [&](ArrayRef Opcodes) -> DIExpression * { SmallVector Ops(Opcodes.begin(), Opcodes.end()); return doSalvage(Ops); }; @@ -1912,15 +1874,15 @@ SmallVector Ops; if (auto *GEP = dyn_cast(&I)) { - if (getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues)) + if (getSalvageOpsForGEP(GEP, DL, Ops)) return doSalvage(Ops); } else if (auto *BI = dyn_cast(&I)) { - if (getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues)) + if (getSalvageOpsForBinOp(BI, Ops)) return doSalvage(Ops); } - // *Not* to do: we should not attempt to salvage load instructions, - // because the validity and lifetime of a dbg.value containing - // DW_OP_deref becomes difficult to analyze. See PR40628 for examples. + // *Not* to do: we should not attempt to salvage load instructions, + // because the validity and lifetime of a dbg.value containing + // DW_OP_deref becomes difficult to analyze. See PR40628 for examples. return nullptr; } diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -483,6 +483,150 @@ ret void } +define void @test_guard_if_and_enter(i32* nocapture readonly %data, i64 %count) { +; CHECK-LABEL: 'test_guard_if_and_enter' +; CHECK-NEXT: Classifying expressions for: @test_guard_if_and_enter +; CHECK-NEXT: %cmp.and = and i1 %cmp.ult, %cmp.ne +; CHECK-NEXT: --> %cmp.and U: full-set S: full-set +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-1) S: [0,-1) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_if_and_enter +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: max backedge-taken count is -2 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %count) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.ult = icmp ult i64 %count, 5 + %cmp.ne = icmp ne i64 %count, 0 + %cmp.and = and i1 %cmp.ult, %cmp.ne + br i1 %cmp.and, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %idx = getelementptr inbounds i32, i32* %data, i64 %iv + store i32 1, i32* %idx, align 4 + %iv.next = add nuw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %count + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @test_guard_if_and_skip(i32* nocapture readonly %data, i64 %count) { +; CHECK-LABEL: 'test_guard_if_and_skip' +; CHECK-NEXT: Classifying expressions for: @test_guard_if_and_skip +; CHECK-NEXT: %cmp.and = and i1 %cmp.ult, %cmp.ne +; CHECK-NEXT: --> %cmp.and U: full-set S: full-set +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (-1 + %count) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_if_and_skip +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %count) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.ult = icmp ult i64 %count, 5 + %cmp.ne = icmp ne i64 %count, 0 + %cmp.and = and i1 %cmp.ult, %cmp.ne + br i1 %cmp.and, label %exit, label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %idx = getelementptr inbounds i32, i32* %data, i64 %iv + store i32 1, i32* %idx, align 4 + %iv.next = add nuw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %count + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @test_guard_if_or_skip(i32* nocapture readonly %data, i64 %count) { +; CHECK-LABEL: 'test_guard_if_or_skip' +; CHECK-NEXT: Classifying expressions for: @test_guard_if_or_skip +; CHECK-NEXT: %cmp.or = or i1 %cmp.uge, %cmp.eq +; CHECK-NEXT: --> %cmp.or U: full-set S: full-set +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-1) S: [0,-1) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_if_or_skip +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: max backedge-taken count is -2 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %count) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.uge = icmp uge i64 %count, 5 + %cmp.eq = icmp eq i64 %count, 0 + %cmp.or = or i1 %cmp.uge, %cmp.eq + br i1 %cmp.or, label %exit, label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %idx = getelementptr inbounds i32, i32* %data, i64 %iv + store i32 1, i32* %idx, align 4 + %iv.next = add nuw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %count + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @test_guard_if_or_enter(i32* nocapture readonly %data, i64 %count) { +; CHECK-LABEL: 'test_guard_if_or_enter' +; CHECK-NEXT: Classifying expressions for: @test_guard_if_or_enter +; CHECK-NEXT: %cmp.or = or i1 %cmp.uge, %cmp.eq +; CHECK-NEXT: --> %cmp.or U: full-set S: full-set +; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (-1 + %count) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @test_guard_if_or_enter +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-1 + %count) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 +; +entry: + %cmp.uge = icmp uge i64 %count, 5 + %cmp.eq = icmp eq i64 %count, 0 + %cmp.or = or i1 %cmp.uge, %cmp.eq + br i1 %cmp.or, label %loop, label %exit + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %idx = getelementptr inbounds i32, i32* %data, i64 %iv + store i32 1, i32* %idx, align 4 + %iv.next = add nuw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %count + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + ; Test case for PR47247. Both the guard condition and the assume limit the ; max backedge-taken count. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -1,83 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O1 ; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O0 -; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE,CHECK-LSE-O1 -; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE,CHECK-LSE-O0 +; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O1 +; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O0 define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 { -; CHECK-NOLSE-LABEL: val_compare_and_swap: -; CHECK-NOLSE-O1: LBB0_1: -; CHECK-NOLSE-O1: ldaxr [[VAL:w[0-9]+]], [x0] -; CHECK-NOLSE-O1: cmp [[VAL]], w1 -; CHECK-NOLSE-O1: b.ne LBB0_4 -; CHECK-NOLSE-O1: stxr [[STATUS:w[0-9]+]], w2, [x0] -; CHECK-NOLSE-O1: cbnz [[STATUS]], LBB0_1 -; CHECK-NOLSE-O1: mov w0, [[VAL]] -; CHECK-NOLSE-O1: ret -; CHECK-NOLSE-O1: LBB0_4: -; CHECK-NOLSE-O1: clrex -; CHECK-NOLSE-O1: mov w0, [[VAL]] -; CHECK-NOLSE-O1: ret - -; CHECK-NOLSE-O0: mov x[[ADDR:[0-9]+]], x0 -; CHECK-NOLSE-O0: LBB0_1: -; CHECK-NOLSE-O0: ldaxr w0, [x[[ADDR]]] -; CHECK-NOLSE-O0: cmp w0, w1 -; CHECK-NOLSE-O0: b.ne LBB0_3 -; CHECK-NOLSE-O0: stlxr [[STATUS:w[0-9]+]], w2, [x[[ADDR]]] -; CHECK-NOLSE-O0: cbnz [[STATUS]], LBB0_1 -; CHECK-NOLSE-O0: LBB0_3: -; CHECK-NOLSE-O0: ret - -; CHECK-LSE-LABEL: val_compare_and_swap: -; CHECK-LSE-O1: casa w1, w2, [x0] -; CHECK-LSE-O1: mov x0, x1 - -; CHECK-LSE-O0: mov x[[ADDR:[0-9]+]], x0 -; CHECK-LSE-O0: mov x0, x1 -; CHECK-LSE-O0: casa w0, w2, [x[[ADDR]]] - +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: LBB0_1: ; %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: cmp w8, w1 +; CHECK-NOLSE-O1-NEXT: b.ne LBB0_4 +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: stxr w9, w2, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB0_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: LBB0_4: ; %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: LBB0_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: ldaxr w0, [x9] +; CHECK-NOLSE-O0-NEXT: cmp w0, w1 +; CHECK-NOLSE-O0-NEXT: b.ne LBB0_3 +; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: stlxr w8, w2, [x9] +; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB0_1 +; CHECK-NOLSE-O0-NEXT: LBB0_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: val_compare_and_swap: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: casa w1, w2, [x0] +; CHECK-LSE-O1-NEXT: mov x0, x1 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: val_compare_and_swap: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: mov x8, x0 +; CHECK-LSE-O0-NEXT: mov x0, x1 +; CHECK-LSE-O0-NEXT: casa w0, w2, [x8] +; CHECK-LSE-O0-NEXT: ret %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { -; CHECK-NOLSE-LABEL: val_compare_and_swap_from_load: -; CHECK-NOLSE-O1: ldr [[NEW:w[0-9]+]], [x2] -; CHECK-NOLSE-O1: LBB1_1: -; CHECK-NOLSE-O1: ldaxr [[VAL:w[0-9]+]], [x0] -; CHECK-NOLSE-O1: cmp [[VAL]], w1 -; CHECK-NOLSE-O1: b.ne LBB1_4 -; CHECK-NOLSE-O1: stxr [[STATUS:w[0-9]+]], [[NEW]], [x0] -; CHECK-NOLSE-O1: cbnz [[STATUS]], LBB1_1 -; CHECK-NOLSE-O1: mov w0, [[VAL]] -; CHECK-NOLSE-O1: ret -; CHECK-NOLSE-O1: LBB1_4: -; CHECK-NOLSE-O1: clrex -; CHECK-NOLSE-O1: mov w0, [[VAL]] -; CHECK-NOLSE-O1: ret - -; CHECK-NOLSE-O0: mov x[[ADDR:[0-9]+]], x0 -; CHECK-NOLSE-O0: ldr [[NEW:w[0-9]+]], [x2] -; CHECK-NOLSE-O0: LBB1_1: -; CHECK-NOLSE-O0: ldaxr w0, [x[[ADDR]]] -; CHECK-NOLSE-O0: cmp w0, w1 -; CHECK-NOLSE-O0: b.ne LBB1_3 -; CHECK-NOLSE-O0: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] -; CHECK-NOLSE-O0: cbnz [[STATUS]], LBB1_1 -; CHECK-NOLSE-O0: LBB1_3: -; CHECK-NOLSE-O0: ret - -; CHECK-LSE-LABEL: val_compare_and_swap_from_load: -; CHECK-LSE-O1: ldr [[NEW:w[0-9]+]], [x2] -; CHECK-LSE-O1: casa w1, [[NEW]], [x0] -; CHECK-LSE-O1: mov x0, x1 - -; CHECK-LSE-O0: mov x[[ADDR:[0-9]+]], x0 -; CHECK-LSE-O0: mov x0, x1 -; CHECK-LSE-O0: ldr [[NEW:w[0-9]+]], [x2] -; CHECK-LSE-O0: casa w0, [[NEW]], [x[[ADDR]]] - +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_from_load: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: ldr w9, [x2] +; CHECK-NOLSE-O1-NEXT: LBB1_1: ; %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: cmp w8, w1 +; CHECK-NOLSE-O1-NEXT: b.ne LBB1_4 +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB1_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB1_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: LBB1_4: ; %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_from_load: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: ldr w10, [x2] +; CHECK-NOLSE-O0-NEXT: LBB1_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: ldaxr w0, [x9] +; CHECK-NOLSE-O0-NEXT: cmp w0, w1 +; CHECK-NOLSE-O0-NEXT: b.ne LBB1_3 +; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: stlxr w8, w10, [x9] +; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB1_1 +; CHECK-NOLSE-O0-NEXT: LBB1_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: val_compare_and_swap_from_load: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldr w8, [x2] +; CHECK-LSE-O1-NEXT: casa w1, w8, [x0] +; CHECK-LSE-O1-NEXT: mov x0, x1 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: val_compare_and_swap_from_load: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: mov x9, x0 +; CHECK-LSE-O0-NEXT: mov x0, x1 +; CHECK-LSE-O0-NEXT: ldr w8, [x2] +; CHECK-LSE-O0-NEXT: casa w0, w8, [x9] +; CHECK-LSE-O0-NEXT: ret %new = load i32, i32* %pnew %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 @@ -85,242 +110,538 @@ } define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 { -; CHECK-NOLSE-LABEL: val_compare_and_swap_rel: -; CHECK-NOLSE-O1: LBB2_1: -; CHECK-NOLSE-O1: ldaxr [[VAL:w[0-9]+]], [x0] -; CHECK-NOLSE-O1: cmp [[VAL]], w1 -; CHECK-NOLSE-O1: b.ne LBB2_4 -; CHECK-NOLSE-O1: stlxr [[STATUS:w[0-9]+]], w2, [x0] -; CHECK-NOLSE-O1: cbnz [[STATUS]], LBB2_1 -; CHECK-NOLSE-O1: mov w0, [[VAL]] -; CHECK-NOLSE-O1: ret -; CHECK-NOLSE-O1: LBB2_4: -; CHECK-NOLSE-O1: clrex -; CHECK-NOLSE-O1: mov w0, [[VAL]] -; CHECK-NOLSE-O1: ret - -; CHECK-NOLSE-O0: mov x[[ADDR:[0-9]+]], x0 -; CHECK-NOLSE-O0: LBB2_1: -; CHECK-NOLSE-O0: ldaxr w0, [x[[ADDR]]] -; CHECK-NOLSE-O0: cmp w0, w1 -; CHECK-NOLSE-O0: b.ne LBB2_3 -; CHECK-NOLSE-O0: stlxr [[STATUS:w[0-9]+]], w2, [x[[ADDR]]] -; CHECK-NOLSE-O0: cbnz [[STATUS]], LBB2_1 -; CHECK-NOLSE-O0: LBB2_3: -; CHECK-NOLSE-O0: ret - -; CHECK-LSE-LABEL: val_compare_and_swap_rel: -; CHECK-LSE-O1: casal w1, w2, [x0] -; CHECK-LSE-O1: mov x0, x1 - -; CHECK-LSE-O0: mov x[[ADDR:[0-9]+]], x0 -; CHECK-LSE-O0: mov x0, x1 -; CHECK-LSE-O0: casal w0, w2, [x[[ADDR]]] - +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_rel: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: LBB2_1: ; %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: cmp w8, w1 +; CHECK-NOLSE-O1-NEXT: b.ne LBB2_4 +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB2_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: stlxr w9, w2, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB2_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: LBB2_4: ; %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_rel: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: LBB2_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: ldaxr w0, [x9] +; CHECK-NOLSE-O0-NEXT: cmp w0, w1 +; CHECK-NOLSE-O0-NEXT: b.ne LBB2_3 +; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: stlxr w8, w2, [x9] +; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB2_1 +; CHECK-NOLSE-O0-NEXT: LBB2_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: val_compare_and_swap_rel: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: casal w1, w2, [x0] +; CHECK-LSE-O1-NEXT: mov x0, x1 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: val_compare_and_swap_rel: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: mov x8, x0 +; CHECK-LSE-O0-NEXT: mov x0, x1 +; CHECK-LSE-O0-NEXT: casal w0, w2, [x8] +; CHECK-LSE-O0-NEXT: ret %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val } define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 { -; CHECK-NOLSE-LABEL: val_compare_and_swap_64: -; CHECK-NOLSE-O1: LBB3_1: -; CHECK-NOLSE-O1: ldxr [[VAL:x[0-9]+]], [x0] -; CHECK-NOLSE-O1: cmp [[VAL]], x1 -; CHECK-NOLSE-O1: b.ne LBB3_4 -; CHECK-NOLSE-O1: stxr [[STATUS:w[0-9]+]], x2, [x0] -; CHECK-NOLSE-O1: cbnz [[STATUS]], LBB3_1 -; CHECK-NOLSE-O1: mov x0, [[VAL]] -; CHECK-NOLSE-O1: ret -; CHECK-NOLSE-O1: LBB3_4: -; CHECK-NOLSE-O1: clrex -; CHECK-NOLSE-O1: mov x0, [[VAL]] -; CHECK-NOLSE-O1: ret - -; CHECK-NOLSE-O0: mov x[[ADDR:[0-9]+]], x0 -; CHECK-NOLSE-O0: LBB3_1: -; CHECK-NOLSE-O0: ldaxr x0, [x[[ADDR]]] -; CHECK-NOLSE-O0: cmp x0, x1 -; CHECK-NOLSE-O0: b.ne LBB3_3 -; CHECK-NOLSE-O0: stlxr [[STATUS:w[0-9]+]], x2, [x[[ADDR]]] -; CHECK-NOLSE-O0: cbnz [[STATUS]], LBB3_1 -; CHECK-NOLSE-O0: LBB3_3: -; CHECK-NOLSE-O0: ret - -; CHECK-LSE-LABEL: val_compare_and_swap_64: -; CHECK-LSE-O1: cas x1, x2, [x0] -; CHECK-LSE-O1: mov x0, x1 - -; CHECK-LSE-O0: mov x[[ADDR:[0-9]+]], x0 -; CHECK-LSE-O0: mov x0, x1 -; CHECK-LSE-O0: cas x0, x2, [x[[ADDR]]] - +; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: LBB3_1: ; %cmpxchg.start +; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: cmp x8, x1 +; CHECK-NOLSE-O1-NEXT: b.ne LBB3_4 +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB3_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; CHECK-NOLSE-O1-NEXT: LBB3_4: ; %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: clrex +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: mov x9, x0 +; CHECK-NOLSE-O0-NEXT: LBB3_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: ldaxr x0, [x9] +; CHECK-NOLSE-O0-NEXT: cmp x0, x1 +; CHECK-NOLSE-O0-NEXT: b.ne LBB3_3 +; CHECK-NOLSE-O0-NEXT: ; %bb.2: ; in Loop: Header=BB3_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: stlxr w8, x2, [x9] +; CHECK-NOLSE-O0-NEXT: cbnz w8, LBB3_1 +; CHECK-NOLSE-O0-NEXT: LBB3_3: +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: val_compare_and_swap_64: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: cas x1, x2, [x0] +; CHECK-LSE-O1-NEXT: mov x0, x1 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: val_compare_and_swap_64: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: mov x8, x0 +; CHECK-LSE-O0-NEXT: mov x0, x1 +; CHECK-LSE-O0-NEXT: cas x0, x2, [x8] +; CHECK-LSE-O0-NEXT: ret %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val } define i32 @fetch_and_nand(i32* %p) #0 { -; CHECK-NOLSE-LABEL: fetch_and_nand: -; CHECK-NOLSE-O1: LBB4_1: -; CHECK-NOLSE-O1: ldxr [[VAL:w[0-9]+]], [x0] -; CHECK-NOLSE-O1: and [[NEWTMP:w[0-9]+]], [[VAL]], #0x7 -; CHECK-NOLSE-O1: mvn [[NEW:w[0-9]+]], [[NEWTMP]] -; CHECK-NOLSE-O1: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] -; CHECK-NOLSE-O1: cbnz [[STATUS]], LBB4_1 -; CHECK-NOLSE-O1: mov w0, [[VAL]] -; CHECK-NOLSE-O1: ret - -; CHECK-NOLSE-O0: ldxr -; CHECK-NOLSE-O0: stlxr - -; CHECK-LSE-LABEL: fetch_and_nand: -; CHECK-LSE-O1: LBB4_1: -; CHECK-LSE-O1: ldxr w[[VAL:[0-9]+]], [x0] -; CHECK-LSE-O1: and [[NEWTMP:w[0-9]+]], w[[VAL]], #0x7 -; CHECK-LSE-O1: mvn [[NEW:w[0-9]+]], [[NEWTMP]] -; CHECK-LSE-O1: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] -; CHECK-LSE-O1: cbnz [[STATUS]], LBB4_1 -; CHECK-LSE-O1: mov x0, x[[VAL]] - +; CHECK-NOLSE-O1-LABEL: fetch_and_nand: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: LBB4_1: ; %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: ldxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w9, w8, #0x7 +; CHECK-NOLSE-O1-NEXT: mvn w9, w9 +; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB4_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: fetch_and_nand: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; =32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: LBB4_1: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: ; Child Loop BB4_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: and w9, w8, #0x7 +; CHECK-NOLSE-O0-NEXT: mvn w12, w9 +; CHECK-NOLSE-O0-NEXT: LBB4_2: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB4_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11] +; CHECK-NOLSE-O0-NEXT: cmp w9, w8 +; CHECK-NOLSE-O0-NEXT: b.ne LBB4_4 +; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB4_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB4_2 +; CHECK-NOLSE-O0-NEXT: LBB4_4: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB4_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB4_1 +; CHECK-NOLSE-O0-NEXT: ; %bb.5: ; %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; =32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: fetch_and_nand: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: LBB4_1: ; %atomicrmw.start +; CHECK-LSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-LSE-O1-NEXT: ldxr w8, [x0] +; CHECK-LSE-O1-NEXT: and w9, w8, #0x7 +; CHECK-LSE-O1-NEXT: mvn w9, w9 +; CHECK-LSE-O1-NEXT: stlxr w10, w9, [x0] +; CHECK-LSE-O1-NEXT: cbnz w10, LBB4_1 +; CHECK-LSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end +; CHECK-LSE-O1-NEXT: mov x0, x8 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: fetch_and_nand: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: sub sp, sp, #32 ; =32 +; CHECK-LSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: ldr w8, [x0] +; CHECK-LSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-LSE-O0-NEXT: LBB4_1: ; %atomicrmw.start +; CHECK-LSE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-LSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload +; CHECK-LSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-LSE-O0-NEXT: and w9, w8, #0x7 +; CHECK-LSE-O0-NEXT: mvn w10, w9 +; CHECK-LSE-O0-NEXT: mov x9, x8 +; CHECK-LSE-O0-NEXT: casl w9, w10, [x11] +; CHECK-LSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-LSE-O0-NEXT: subs w8, w9, w8 +; CHECK-LSE-O0-NEXT: cset w8, eq +; CHECK-LSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-LSE-O0-NEXT: tbz w8, #0, LBB4_1 +; CHECK-LSE-O0-NEXT: ; %bb.2: ; %atomicrmw.end +; CHECK-LSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-LSE-O0-NEXT: add sp, sp, #32 ; =32 +; CHECK-LSE-O0-NEXT: ret %val = atomicrmw nand i32* %p, i32 7 release ret i32 %val } define i64 @fetch_and_nand_64(i64* %p) #0 { -; CHECK-NOLSE-LABEL: fetch_and_nand_64 -; CHECK-NOLSE-O1: LBB5_1: -; CHECK-NOLSE-O1: ldaxr [[VAL:x[0-9]+]], [x0] -; CHECK-NOLSE-O1: and [[NEWTMP:x[0-9]+]], [[VAL]], #0x7 -; CHECK-NOLSE-O1: mvn [[NEW:x[0-9]+]], [[NEWTMP]] -; CHECK-NOLSE-O1: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] -; CHECK-NOLSE-O1: cbnz [[STATUS]], LBB5_1 -; CHECK-NOLSE-O1: mov x0, [[VAL]] -; CHECK-NOLSE-O1: ret - -; CHECK-NOLSE-O0: ldaxr -; CHECK-NOLSE-O0: stlxr - -; CHECK-LSE-LABEL: fetch_and_nand_64: -; CHECK-LSE-O1: LBB5_1: -; CHECK-LSE-O1: ldaxr [[VAL:x[0-9]+]], [x0] -; CHECK-LSE-O1: and [[NEWTMP:x[0-9]+]], [[VAL]], #0x7 -; CHECK-LSE-O1: mvn [[NEW:x[0-9]+]], [[NEWTMP]] -; CHECK-LSE-O1: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] -; CHECK-LSE-O1: cbnz [[STATUS]], LBB5_1 -; CHECK-LSE-O1: mov x0, [[VAL]] - +; CHECK-NOLSE-O1-LABEL: fetch_and_nand_64: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: LBB5_1: ; %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: and x9, x8, #0x7 +; CHECK-NOLSE-O1-NEXT: mvn x9, x9 +; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB5_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: fetch_and_nand_64: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; =32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: LBB5_1: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: ; Child Loop BB5_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: and x9, x8, #0x7 +; CHECK-NOLSE-O0-NEXT: mvn x12, x9 +; CHECK-NOLSE-O0-NEXT: LBB5_2: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB5_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11] +; CHECK-NOLSE-O0-NEXT: cmp x9, x8 +; CHECK-NOLSE-O0-NEXT: b.ne LBB5_4 +; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB5_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11] +; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB5_2 +; CHECK-NOLSE-O0-NEXT: LBB5_4: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB5_1 +; CHECK-NOLSE-O0-NEXT: ; %bb.5: ; %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; =32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: fetch_and_nand_64: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: LBB5_1: ; %atomicrmw.start +; CHECK-LSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-LSE-O1-NEXT: ldaxr x8, [x0] +; CHECK-LSE-O1-NEXT: and x9, x8, #0x7 +; CHECK-LSE-O1-NEXT: mvn x9, x9 +; CHECK-LSE-O1-NEXT: stlxr w10, x9, [x0] +; CHECK-LSE-O1-NEXT: cbnz w10, LBB5_1 +; CHECK-LSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end +; CHECK-LSE-O1-NEXT: mov x0, x8 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: fetch_and_nand_64: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: sub sp, sp, #32 ; =32 +; CHECK-LSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: ldr x8, [x0] +; CHECK-LSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: LBB5_1: ; %atomicrmw.start +; CHECK-LSE-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-LSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload +; CHECK-LSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-LSE-O0-NEXT: and x9, x8, #0x7 +; CHECK-LSE-O0-NEXT: mvn x10, x9 +; CHECK-LSE-O0-NEXT: mov x9, x8 +; CHECK-LSE-O0-NEXT: casal x9, x10, [x11] +; CHECK-LSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: subs x8, x9, x8 +; CHECK-LSE-O0-NEXT: cset w8, eq +; CHECK-LSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-LSE-O0-NEXT: tbz w8, #0, LBB5_1 +; CHECK-LSE-O0-NEXT: ; %bb.2: ; %atomicrmw.end +; CHECK-LSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-LSE-O0-NEXT: add sp, sp, #32 ; =32 +; CHECK-LSE-O0-NEXT: ret %val = atomicrmw nand i64* %p, i64 7 acq_rel ret i64 %val } define i32 @fetch_and_or(i32* %p) #0 { -; CHECK-NOLSE-LABEL: fetch_and_or: -; CHECK-NOLSE-O1: mov [[FIVE:w[0-9]+]], #5 -; CHECK-NOLSE-O1: LBB6_1: -; CHECK-NOLSE-O1: ldaxr [[VAL:w[0-9]+]], [x0] -; CHECK-NOLSE-O1: orr [[NEW:w[0-9]+]], [[VAL]], [[FIVE]] -; CHECK-NOLSE-O1: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] -; CHECK-NOLSE-O1: cbnz [[STATUS]], LBB6_1 -; CHECK-NOLSE-O1: mov w0, [[VAL]] -; CHECK-NOLSE-O1: ret - -; CHECK-NOLSE-O0: ldaxr -; CHECK-NOLSE-O0: stlxr - -; CHECK-LSE-LABEL: fetch_and_or: -; CHECK-LSE: ; %bb.0: -; CHECK-LSE: mov w8, #5 -; CHECK-LSE: ldsetal w8, w0, [x0] -; CHECK-LSE: ret +; CHECK-NOLSE-O1-LABEL: fetch_and_or: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: mov w9, #5 +; CHECK-NOLSE-O1-NEXT: LBB6_1: ; %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] +; CHECK-NOLSE-O1-NEXT: orr w10, w8, w9 +; CHECK-NOLSE-O1-NEXT: stlxr w11, w10, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB6_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov w0, w8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: fetch_and_or: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; =32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: ldr w8, [x0] +; CHECK-NOLSE-O0-NEXT: str w8, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: LBB6_1: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: ; Child Loop BB6_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: mov w9, #5 +; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9 +; CHECK-NOLSE-O0-NEXT: LBB6_2: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB6_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: ldaxr w9, [x11] +; CHECK-NOLSE-O0-NEXT: cmp w9, w8 +; CHECK-NOLSE-O0-NEXT: b.ne LBB6_4 +; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB6_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: stlxr w10, w12, [x11] +; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB6_2 +; CHECK-NOLSE-O0-NEXT: LBB6_4: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB6_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #12] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: subs w8, w9, w8 +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str w9, [sp, #28] ; 4-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB6_1 +; CHECK-NOLSE-O0-NEXT: ; %bb.5: ; %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; =32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: fetch_and_or: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: mov w8, #5 +; CHECK-LSE-O1-NEXT: ldsetal w8, w0, [x0] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: fetch_and_or: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: mov w8, #5 +; CHECK-LSE-O0-NEXT: ldsetal w8, w0, [x0] +; CHECK-LSE-O0-NEXT: ret %val = atomicrmw or i32* %p, i32 5 seq_cst ret i32 %val } define i64 @fetch_and_or_64(i64* %p) #0 { -; CHECK-NOLSE-LABEL: fetch_and_or_64: -; CHECK-NOLSE-O1: LBB7_1: -; CHECK-NOLSE-O1: ldxr [[VAL:x[0-9]+]], [x0] -; CHECK-NOLSE-O1: orr [[NEW:x[0-9]+]], [[VAL]], #0x7 -; CHECK-NOLSE-O1: stxr [[STATUS:w[0-9]+]], [[NEW]], [x0] -; CHECK-NOLSE-O1: cbnz [[STATUS]], LBB7_1 -; CHECK-NOLSE-O1: mov x0, [[VAL]] -; CHECK-NOLSE-O1: ret - -; CHECK-NOLSE-O0: ldxr -; CHECK-NOLSE-O0: stxr - -; CHECK-LSE-LABEL: fetch_and_or_64: -; CHECK-LSE: mov w[[SEVEN:[0-9]+]], #7 -; CHECK-LSE: ldset x[[SEVEN]], x0, [x0] -; CHECK-LSE: ret +; CHECK-NOLSE-O1-LABEL: fetch_and_or_64: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: LBB7_1: ; %atomicrmw.start +; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0] +; CHECK-NOLSE-O1-NEXT: orr x9, x8, #0x7 +; CHECK-NOLSE-O1-NEXT: stxr w10, x9, [x0] +; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB7_1 +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end +; CHECK-NOLSE-O1-NEXT: mov x0, x8 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: fetch_and_or_64: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: sub sp, sp, #32 ; =32 +; CHECK-NOLSE-O0-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: ldr x8, [x0] +; CHECK-NOLSE-O0-NEXT: str x8, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: LBB7_1: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; =>This Loop Header: Depth=1 +; CHECK-NOLSE-O0-NEXT: ; Child Loop BB7_2 Depth 2 +; CHECK-NOLSE-O0-NEXT: ldr x8, [sp, #24] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: orr x12, x8, #0x7 +; CHECK-NOLSE-O0-NEXT: LBB7_2: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB7_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NOLSE-O0-NEXT: ldaxr x9, [x11] +; CHECK-NOLSE-O0-NEXT: cmp x9, x8 +; CHECK-NOLSE-O0-NEXT: b.ne LBB7_4 +; CHECK-NOLSE-O0-NEXT: ; %bb.3: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB7_2 Depth=2 +; CHECK-NOLSE-O0-NEXT: stlxr w10, x12, [x11] +; CHECK-NOLSE-O0-NEXT: cbnz w10, LBB7_2 +; CHECK-NOLSE-O0-NEXT: LBB7_4: ; %atomicrmw.start +; CHECK-NOLSE-O0-NEXT: ; in Loop: Header=BB7_1 Depth=1 +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #8] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: subs x8, x9, x8 +; CHECK-NOLSE-O0-NEXT: cset w8, eq +; CHECK-NOLSE-O0-NEXT: str x9, [sp, #24] ; 8-byte Folded Spill +; CHECK-NOLSE-O0-NEXT: tbz w8, #0, LBB7_1 +; CHECK-NOLSE-O0-NEXT: ; %bb.5: ; %atomicrmw.end +; CHECK-NOLSE-O0-NEXT: ldr x0, [sp, #8] ; 8-byte Folded Reload +; CHECK-NOLSE-O0-NEXT: add sp, sp, #32 ; =32 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: fetch_and_or_64: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: mov w8, #7 +; CHECK-LSE-O1-NEXT: ldset x8, x0, [x0] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: fetch_and_or_64: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: mov w8, #7 +; CHECK-LSE-O0-NEXT: ; kill: def $x8 killed $w8 +; CHECK-LSE-O0-NEXT: ldset x8, x0, [x0] +; CHECK-LSE-O0-NEXT: ret %val = atomicrmw or i64* %p, i64 7 monotonic ret i64 %val } define void @acquire_fence() #0 { ; CHECK-NOLSE-LABEL: acquire_fence: -; CHECK-NOLSE: dmb ish -; CHECK-NOLSE: ret +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: dmb ish +; CHECK-NOLSE-NEXT: ret ; -; CHECK-LSE-LABEL: acquire_fence: -; CHECK-LSE: dmb ish -; CHECK-LSE: ret +; CHECK-LSE-O1-LABEL: acquire_fence: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: dmb ish +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: acquire_fence: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: dmb ish +; CHECK-LSE-O0-NEXT: ret fence acquire ret void } define void @release_fence() #0 { ; CHECK-NOLSE-LABEL: release_fence: -; CHECK-NOLSE: dmb ish -; CHECK-NOLSE: ret +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: dmb ish +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: release_fence: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: dmb ish +; CHECK-LSE-O1-NEXT: ret ; -; CHECK-LSE-LABEL: release_fence: -; CHECK-LSE: ; %bb.0: -; CHECK-LSE: dmb ish -; CHECK-LSE: ret +; CHECK-LSE-O0-LABEL: release_fence: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: dmb ish +; CHECK-LSE-O0-NEXT: ret fence release ret void } define void @seq_cst_fence() #0 { -; CHECK-LABEL: seq_cst_fence: -; CHECK-NOLSE: dmb ish -; CHECK-NOLSE: ret -; -; CHECK-LSE-LABEL: seq_cst_fence: -; CHECK-LSE: ; %bb.0: -; CHECK-LSE: dmb ish -; CHECK-LSE: ret +; CHECK-NOLSE-LABEL: seq_cst_fence: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: dmb ish +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: seq_cst_fence: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: dmb ish +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: seq_cst_fence: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: dmb ish +; CHECK-LSE-O0-NEXT: ret fence seq_cst ret void } define i32 @atomic_load(i32* %p) #0 { -; CHECK-LABEL: atomic_load: -; CHECK-NOLSE: ldar w0, [x0] -; CHECK-NOLSE: ret +; CHECK-NOLSE-LABEL: atomic_load: +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: ldar w0, [x0] +; CHECK-NOLSE-NEXT: ret ; -; CHECK-LSE-LABEL: atomic_load: -; CHECK-LSE: ldar w0, [x0] -; CHECK-LSE: ret +; CHECK-LSE-O1-LABEL: atomic_load: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldar w0, [x0] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_load: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldar w0, [x0] +; CHECK-LSE-O0-NEXT: ret %r = load atomic i32, i32* %p seq_cst, align 4 ret i32 %r } define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) #0 { -; CHECK-NOLSE-LABEL: atomic_load_relaxed_8: -; CHECK-NOLSE-O1: ldrb {{w[0-9]+}}, [x0, #4095] -; CHECK-NOLSE-O1: ldrb {{w[0-9]+}}, [x0, w1, sxtw] -; CHECK-NOLSE-O1: ldurb {{w[0-9]+}}, [x0, #-256] -; CHECK-NOLSE-O1: add x[[ADDR:[0-9]+]], x0, #291, lsl #12 -; CHECK-NOLSE-O1: ldrb {{w[0-9]+}}, [x[[ADDR]]] - -; CHECK-LSE: ldrb -; CHECK-LSE: ldrb -; CHECK-LSE: ld{{u?}}rb -; CHECK-LSE: ldrb - +; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_8: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: ldrb w8, [x0, #4095] +; CHECK-NOLSE-O1-NEXT: ldrb w9, [x0, w1, sxtw] +; CHECK-NOLSE-O1-NEXT: ldurb w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldrb w11, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w11 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: ldrb w9, [x0, #4095] +; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw +; CHECK-NOLSE-O0-NEXT: ldrb w8, [x8] +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxtb +; CHECK-NOLSE-O0-NEXT: subs x9, x0, #256 ; =256 +; CHECK-NOLSE-O0-NEXT: ldrb w9, [x9] +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxtb +; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O0-NEXT: ldrb w9, [x9] +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxtb +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomic_load_relaxed_8: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldrb w8, [x0, #4095] +; CHECK-LSE-O1-NEXT: ldrb w9, [x0, w1, sxtw] +; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: ldurb w9, [x0, #-256] +; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O1-NEXT: ldrb w9, [x9] +; CHECK-LSE-O1-NEXT: add w0, w8, w9 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_load_relaxed_8: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldrb w9, [x0, #4095] +; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw +; CHECK-LSE-O0-NEXT: ldrb w8, [x8] +; CHECK-LSE-O0-NEXT: add w8, w8, w9, uxtb +; CHECK-LSE-O0-NEXT: subs x9, x0, #256 ; =256 +; CHECK-LSE-O0-NEXT: ldrb w9, [x9] +; CHECK-LSE-O0-NEXT: add w8, w8, w9, uxtb +; CHECK-LSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O0-NEXT: ldrb w9, [x9] +; CHECK-LSE-O0-NEXT: add w0, w8, w9, uxtb +; CHECK-LSE-O0-NEXT: ret %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 %val_unsigned = load atomic i8, i8* %ptr_unsigned monotonic, align 1 @@ -340,18 +661,57 @@ } define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) #0 { -; CHECK-NOLSE-LABEL: atomic_load_relaxed_16: -; CHECK-NOLSE-O1: ldrh {{w[0-9]+}}, [x0, #8190] -; CHECK-NOLSE-O1: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1] -; CHECK-NOLSE-O1: ldurh {{w[0-9]+}}, [x0, #-256] -; CHECK-NOLSE-O1: add x[[ADDR:[0-9]+]], x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1: ldrh {{w[0-9]+}}, [x[[ADDR]]] - -; CHECK-LSE: ldrh -; CHECK-LSE: ldrh -; CHECK-LSE: ld{{u?}}rh -; CHECK-LSE: ldrh - +; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_16: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: ldrh w8, [x0, #8190] +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] +; CHECK-NOLSE-O1-NEXT: ldurh w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldrh w11, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w11 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: ldrh w9, [x0, #8190] +; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw #1 +; CHECK-NOLSE-O0-NEXT: ldrh w8, [x8] +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: subs x9, x0, #256 ; =256 +; CHECK-NOLSE-O0-NEXT: ldrh w9, [x9] +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O0-NEXT: ldrh w9, [x9] +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9, uxth +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomic_load_relaxed_16: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldrh w8, [x0, #8190] +; CHECK-LSE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] +; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: ldurh w9, [x0, #-256] +; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O1-NEXT: ldrh w9, [x9] +; CHECK-LSE-O1-NEXT: add w0, w8, w9 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_load_relaxed_16: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldrh w9, [x0, #8190] +; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw #1 +; CHECK-LSE-O0-NEXT: ldrh w8, [x8] +; CHECK-LSE-O0-NEXT: add w8, w8, w9, uxth +; CHECK-LSE-O0-NEXT: subs x9, x0, #256 ; =256 +; CHECK-LSE-O0-NEXT: ldrh w9, [x9] +; CHECK-LSE-O0-NEXT: add w8, w8, w9, uxth +; CHECK-LSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O0-NEXT: ldrh w9, [x9] +; CHECK-LSE-O0-NEXT: add w0, w8, w9, uxth +; CHECK-LSE-O0-NEXT: ret %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 %val_unsigned = load atomic i16, i16* %ptr_unsigned monotonic, align 2 @@ -371,20 +731,53 @@ } define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) #0 { -; CHECK-NOLSE-LABEL: atomic_load_relaxed_32: -; CHECK-NOLSE-O1: ldr {{w[0-9]+}}, [x0, #16380] -; CHECK-NOLSE-O1: ldr {{w[0-9]+}}, [x0, w1, sxtw #2] -; CHECK-NOLSE-O1: ldur {{w[0-9]+}}, [x0, #-256] -; CHECK-NOLSE-O1: add x[[ADDR:[0-9]+]], x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1: ldr {{w[0-9]+}}, [x[[ADDR]]] - -; CHECK-LSE-LABEL: atomic_load_relaxed_32: -; CHECK-LSE: ldr {{w[0-9]+}}, [x0, #16380] -; CHECK-LSE: ldr {{w[0-9]+}}, [x0, w1, sxtw #2] -; CHECK-LSE: ldur {{w[0-9]+}}, [x0, #-256] -; CHECK-LSE: add x[[ADDR:[0-9]+]], x0, #291, lsl #12 ; =1191936 -; CHECK-LSE: ldr {{w[0-9]+}}, [x[[ADDR]]] - +; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_32: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: ldr w8, [x0, #16380] +; CHECK-NOLSE-O1-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-NOLSE-O1-NEXT: ldur w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldr w11, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w11 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_32: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: ldr w8, [x0, #16380] +; CHECK-NOLSE-O0-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O0-NEXT: ldur w9, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O0-NEXT: ldr w9, [x9] +; CHECK-NOLSE-O0-NEXT: add w0, w8, w9 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomic_load_relaxed_32: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldr w8, [x0, #16380] +; CHECK-LSE-O1-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: ldur w9, [x0, #-256] +; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O1-NEXT: ldr w9, [x9] +; CHECK-LSE-O1-NEXT: add w0, w8, w9 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_load_relaxed_32: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldr w8, [x0, #16380] +; CHECK-LSE-O0-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-LSE-O0-NEXT: add w8, w8, w9 +; CHECK-LSE-O0-NEXT: ldur w9, [x0, #-256] +; CHECK-LSE-O0-NEXT: add w8, w8, w9 +; CHECK-LSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O0-NEXT: ldr w9, [x9] +; CHECK-LSE-O0-NEXT: add w0, w8, w9 +; CHECK-LSE-O0-NEXT: ret %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 %val_unsigned = load atomic i32, i32* %ptr_unsigned monotonic, align 4 @@ -404,20 +797,53 @@ } define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) #0 { -; CHECK-NOLSE-LABEL: atomic_load_relaxed_64: -; CHECK-NOLSE-O1: ldr {{x[0-9]+}}, [x0, #32760] -; CHECK-NOLSE-O1: ldr {{x[0-9]+}}, [x0, w1, sxtw #3] -; CHECK-NOLSE-O1: ldur {{x[0-9]+}}, [x0, #-256] -; CHECK-NOLSE-O1: add x[[ADDR:[0-9]+]], x0, #291, lsl #12 -; CHECK-NOLSE-O1: ldr {{x[0-9]+}}, [x[[ADDR]]] - -; CHECK-LSE-LABEL: atomic_load_relaxed_64: -; CHECK-LSE: ldr {{x[0-9]+}}, [x0, #32760] -; CHECK-LSE: ldr {{x[0-9]+}}, [x0, w1, sxtw #3] -; CHECK-LSE: ldur {{x[0-9]+}}, [x0, #-256] -; CHECK-LSE: add x[[ADDR:[0-9]+]], x0, #291, lsl #12 -; CHECK-LSE: ldr {{x[0-9]+}}, [x[[ADDR]]] - +; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_64: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: ldr x8, [x0, #32760] +; CHECK-NOLSE-O1-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-NOLSE-O1-NEXT: ldur x10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldr x11, [x11] +; CHECK-NOLSE-O1-NEXT: add x8, x8, x9 +; CHECK-NOLSE-O1-NEXT: add x8, x8, x10 +; CHECK-NOLSE-O1-NEXT: add x0, x8, x11 +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_64: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: ldr x8, [x0, #32760] +; CHECK-NOLSE-O0-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-NOLSE-O0-NEXT: add x8, x8, x9 +; CHECK-NOLSE-O0-NEXT: ldur x9, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: add x8, x8, x9 +; CHECK-NOLSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O0-NEXT: ldr x9, [x9] +; CHECK-NOLSE-O0-NEXT: add x0, x8, x9 +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomic_load_relaxed_64: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: ldr x8, [x0, #32760] +; CHECK-LSE-O1-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-LSE-O1-NEXT: add x8, x8, x9 +; CHECK-LSE-O1-NEXT: ldur x9, [x0, #-256] +; CHECK-LSE-O1-NEXT: add x8, x8, x9 +; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O1-NEXT: ldr x9, [x9] +; CHECK-LSE-O1-NEXT: add x0, x8, x9 +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_load_relaxed_64: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: ldr x8, [x0, #32760] +; CHECK-LSE-O0-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-LSE-O0-NEXT: add x8, x8, x9 +; CHECK-LSE-O0-NEXT: ldur x9, [x0, #-256] +; CHECK-LSE-O0-NEXT: add x8, x8, x9 +; CHECK-LSE-O0-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O0-NEXT: ldr x9, [x9] +; CHECK-LSE-O0-NEXT: add x0, x8, x9 +; CHECK-LSE-O0-NEXT: ret %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 %val_unsigned = load atomic i64, i64* %ptr_unsigned monotonic, align 8 @@ -439,25 +865,70 @@ define void @atomc_store(i32* %p) #0 { ; CHECK-NOLSE-LABEL: atomc_store: -; CHECK-NOLSE: mov w8, #4 -; CHECK-NOLSE: stlr w8, [x0] -; CHECK-NOLSE: ret -; -; CHECK-LSE-LABEL: atomc_store: -; CHECK-LSE: mov [[FOUR:w[0-9]+]], #4 -; CHECK-LSE: stlr [[FOUR]], [x0] -; CHECK-LSE: ret +; CHECK-NOLSE: ; %bb.0: +; CHECK-NOLSE-NEXT: mov w8, #4 +; CHECK-NOLSE-NEXT: stlr w8, [x0] +; CHECK-NOLSE-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomc_store: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: mov w8, #4 +; CHECK-LSE-O1-NEXT: stlr w8, [x0] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomc_store: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: mov w8, #4 +; CHECK-LSE-O0-NEXT: stlr w8, [x0] +; CHECK-LSE-O0-NEXT: ret store atomic i32 4, i32* %p seq_cst, align 4 ret void } define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 { -; CHECK-NOLSE-LABEL: atomic_store_relaxed_8: -; CHECK-NOLSE: strb w2 -; CHECK-NOLSE: strb w2 -; CHECK-NOLSE: strb w2 -; CHECK-NOLSE: strb w2 - +; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, w1, sxtw +; CHECK-NOLSE-O1-NEXT: sub x9, x0, #256 ; =256 +; CHECK-NOLSE-O1-NEXT: add x10, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: strb w2, [x0, #4095] +; CHECK-NOLSE-O1-NEXT: strb w2, [x8] +; CHECK-NOLSE-O1-NEXT: strb w2, [x9] +; CHECK-NOLSE-O1-NEXT: strb w2, [x10] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: strb w2, [x0, #4095] +; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw +; CHECK-NOLSE-O0-NEXT: strb w2, [x8] +; CHECK-NOLSE-O0-NEXT: subs x8, x0, #256 ; =256 +; CHECK-NOLSE-O0-NEXT: strb w2, [x8] +; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O0-NEXT: strb w2, [x8] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomic_store_relaxed_8: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: strb w2, [x0, #4095] +; CHECK-LSE-O1-NEXT: add x8, x0, w1, sxtw +; CHECK-LSE-O1-NEXT: strb w2, [x8] +; CHECK-LSE-O1-NEXT: sub x8, x0, #256 ; =256 +; CHECK-LSE-O1-NEXT: strb w2, [x8] +; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O1-NEXT: strb w2, [x8] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_store_relaxed_8: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: strb w2, [x0, #4095] +; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw +; CHECK-LSE-O0-NEXT: strb w2, [x8] +; CHECK-LSE-O0-NEXT: subs x8, x0, #256 ; =256 +; CHECK-LSE-O0-NEXT: strb w2, [x8] +; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O0-NEXT: strb w2, [x8] +; CHECK-LSE-O0-NEXT: ret %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1 @@ -474,17 +945,49 @@ } define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 { -; CHECK-NOLSE-LABEL: atomic_store_relaxed_16: -; CHECK-NOLSE: strh w2 -; CHECK-NOLSE: strh w2 -; CHECK-NOLSE: strh w2 -; CHECK-NOLSE: strh w2 - -; CHECK-LSE: strh w2 -; CHECK-LSE: strh w2 -; CHECK-LSE: strh w2 -; CHECK-LSE: strh w2 - +; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, w1, sxtw #1 +; CHECK-NOLSE-O1-NEXT: sub x9, x0, #256 ; =256 +; CHECK-NOLSE-O1-NEXT: add x10, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: strh w2, [x0, #8190] +; CHECK-NOLSE-O1-NEXT: strh w2, [x8] +; CHECK-NOLSE-O1-NEXT: strh w2, [x9] +; CHECK-NOLSE-O1-NEXT: strh w2, [x10] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: strh w2, [x0, #8190] +; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw #1 +; CHECK-NOLSE-O0-NEXT: strh w2, [x8] +; CHECK-NOLSE-O0-NEXT: subs x8, x0, #256 ; =256 +; CHECK-NOLSE-O0-NEXT: strh w2, [x8] +; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O0-NEXT: strh w2, [x8] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomic_store_relaxed_16: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: strh w2, [x0, #8190] +; CHECK-LSE-O1-NEXT: add x8, x0, w1, sxtw #1 +; CHECK-LSE-O1-NEXT: strh w2, [x8] +; CHECK-LSE-O1-NEXT: sub x8, x0, #256 ; =256 +; CHECK-LSE-O1-NEXT: strh w2, [x8] +; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O1-NEXT: strh w2, [x8] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_store_relaxed_16: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: strh w2, [x0, #8190] +; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw #1 +; CHECK-LSE-O0-NEXT: strh w2, [x8] +; CHECK-LSE-O0-NEXT: subs x8, x0, #256 ; =256 +; CHECK-LSE-O0-NEXT: strh w2, [x8] +; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O0-NEXT: strh w2, [x8] +; CHECK-LSE-O0-NEXT: ret %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2 @@ -501,17 +1004,41 @@ } define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) #0 { -; CHECK-NOLSE-LABEL: atomic_store_relaxed_32: -; CHECK-NOLSE: str w2 -; CHECK-NOLSE: str w2 -; CHECK-NOLSE: stur w2 -; CHECK-NOLSE: str w2 - -; CHECK-LSE-LABEL: atomic_store_relaxed_32: -; CHECK-LSE: str w2 -; CHECK-LSE: str w2 -; CHECK-LSE: stur w2 -; CHECK-LSE: str w2 +; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_32: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: str w2, [x0, #16380] +; CHECK-NOLSE-O1-NEXT: str w2, [x0, w1, sxtw #2] +; CHECK-NOLSE-O1-NEXT: stur w2, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: str w2, [x8] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_32: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: str w2, [x0, #16380] +; CHECK-NOLSE-O0-NEXT: str w2, [x0, w1, sxtw #2] +; CHECK-NOLSE-O0-NEXT: stur w2, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O0-NEXT: str w2, [x8] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomic_store_relaxed_32: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: str w2, [x0, #16380] +; CHECK-LSE-O1-NEXT: str w2, [x0, w1, sxtw #2] +; CHECK-LSE-O1-NEXT: stur w2, [x0, #-256] +; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O1-NEXT: str w2, [x8] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_store_relaxed_32: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: str w2, [x0, #16380] +; CHECK-LSE-O0-NEXT: str w2, [x0, w1, sxtw #2] +; CHECK-LSE-O0-NEXT: stur w2, [x0, #-256] +; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O0-NEXT: str w2, [x8] +; CHECK-LSE-O0-NEXT: ret %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4 @@ -528,17 +1055,41 @@ } define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) #0 { -; CHECK-NOLSE-LABEL: atomic_store_relaxed_64: -; CHECK-NOLSE: str x2 -; CHECK-NOLSE: str x2 -; CHECK-NOLSE: stur x2 -; CHECK-NOLSE: str x2 - -; CHECK-LSE-LABEL: atomic_store_relaxed_64: -; CHECK-LSE: str x2 -; CHECK-LSE: str x2 -; CHECK-LSE: stur x2 -; CHECK-LSE: str x2 +; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_64: +; CHECK-NOLSE-O1: ; %bb.0: +; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: str x2, [x0, #32760] +; CHECK-NOLSE-O1-NEXT: str x2, [x0, w1, sxtw #3] +; CHECK-NOLSE-O1-NEXT: stur x2, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: str x2, [x8] +; CHECK-NOLSE-O1-NEXT: ret +; +; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_64: +; CHECK-NOLSE-O0: ; %bb.0: +; CHECK-NOLSE-O0-NEXT: str x2, [x0, #32760] +; CHECK-NOLSE-O0-NEXT: str x2, [x0, w1, sxtw #3] +; CHECK-NOLSE-O0-NEXT: stur x2, [x0, #-256] +; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O0-NEXT: str x2, [x8] +; CHECK-NOLSE-O0-NEXT: ret +; +; CHECK-LSE-O1-LABEL: atomic_store_relaxed_64: +; CHECK-LSE-O1: ; %bb.0: +; CHECK-LSE-O1-NEXT: str x2, [x0, #32760] +; CHECK-LSE-O1-NEXT: str x2, [x0, w1, sxtw #3] +; CHECK-LSE-O1-NEXT: stur x2, [x0, #-256] +; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O1-NEXT: str x2, [x8] +; CHECK-LSE-O1-NEXT: ret +; +; CHECK-LSE-O0-LABEL: atomic_store_relaxed_64: +; CHECK-LSE-O0: ; %bb.0: +; CHECK-LSE-O0-NEXT: str x2, [x0, #32760] +; CHECK-LSE-O0-NEXT: str x2, [x0, w1, sxtw #3] +; CHECK-LSE-O0-NEXT: stur x2, [x0, #-256] +; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936 +; CHECK-LSE-O0-NEXT: str x2, [x8] +; CHECK-LSE-O0-NEXT: ret %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll @@ -0,0 +1,697 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE + +; Ensure there's no stack spill in between ldxr/stxr pairs. + +define i8 @test_rmw_add_8(i8* %dst) { +; NOLSE-LABEL: test_rmw_add_8: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrb w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB0_1 +; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB0_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrb w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxtb +; NOLSE-NEXT: b.ne .LBB0_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2 +; NOLSE-NEXT: stlxrb w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB0_2 +; NOLSE-NEXT: .LBB0_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxtb +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB0_1 +; NOLSE-NEXT: b .LBB0_5 +; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_8: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddalb w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i8* %dst, i8 1 seq_cst + ret i8 %res +} + +define i16 @test_rmw_add_16(i16* %dst) { +; NOLSE-LABEL: test_rmw_add_16: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrh w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB1_1 +; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB1_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrh w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxth +; NOLSE-NEXT: b.ne .LBB1_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2 +; NOLSE-NEXT: stlxrh w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB1_2 +; NOLSE-NEXT: .LBB1_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxth +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB1_1 +; NOLSE-NEXT: b .LBB1_5 +; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_16: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddalh w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i16* %dst, i16 1 seq_cst + ret i16 %res +} + +define i32 @test_rmw_add_32(i32* %dst) { +; NOLSE-LABEL: test_rmw_add_32: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB2_1 +; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB2_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB2_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr w8, [x11] +; NOLSE-NEXT: cmp w8, w9 +; NOLSE-NEXT: b.ne .LBB2_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2 +; NOLSE-NEXT: stlxr w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB2_2 +; NOLSE-NEXT: .LBB2_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB2_1 +; NOLSE-NEXT: b .LBB2_5 +; NOLSE-NEXT: .LBB2_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_32: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddal w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i32* %dst, i32 1 seq_cst + ret i32 %res +} + +define i64 @test_rmw_add_64(i64* %dst) { +; NOLSE-LABEL: test_rmw_add_64: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0] +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB3_1 +; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB3_2 Depth 2 +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add x12, x9, #1 // =1 +; NOLSE-NEXT: .LBB3_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr x8, [x11] +; NOLSE-NEXT: cmp x8, x9 +; NOLSE-NEXT: b.ne .LBB3_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2 +; NOLSE-NEXT: stlxr w10, x12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB3_2 +; NOLSE-NEXT: .LBB3_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 +; NOLSE-NEXT: subs x9, x8, x9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB3_1 +; NOLSE-NEXT: b .LBB3_5 +; NOLSE-NEXT: .LBB3_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_64: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: // kill: def $x8 killed $w8 +; LSE-NEXT: ldaddal x8, x0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i64* %dst, i64 1 seq_cst + ret i64 %res +} + +define i128 @test_rmw_add_128(i128* %dst) { +; NOLSE-LABEL: test_rmw_add_128: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #48 // =48 +; NOLSE-NEXT: .cfi_def_cfa_offset 48 +; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0, #8] +; NOLSE-NEXT: ldr x9, [x0] +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB4_1 +; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB4_2 Depth 2 +; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: adds x14, x8, #1 // =1 +; NOLSE-NEXT: mov x9, xzr +; NOLSE-NEXT: adcs x15, x11, x9 +; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxp x10, x9, [x13] +; NOLSE-NEXT: cmp x10, x8 +; NOLSE-NEXT: cset w12, ne +; NOLSE-NEXT: cmp x9, x11 +; NOLSE-NEXT: cinc w12, w12, ne +; NOLSE-NEXT: cbnz w12, .LBB4_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 +; NOLSE-NEXT: stlxp w12, x14, x15, [x13] +; NOLSE-NEXT: cbnz w12, .LBB4_2 +; NOLSE-NEXT: .LBB4_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 +; NOLSE-NEXT: eor x11, x9, x11 +; NOLSE-NEXT: eor x8, x10, x8 +; NOLSE-NEXT: orr x8, x8, x11 +; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: cbnz x8, .LBB4_1 +; NOLSE-NEXT: b .LBB4_5 +; NOLSE-NEXT: .LBB4_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #48 // =48 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_128: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #80 // =80 +; LSE-NEXT: .cfi_def_cfa_offset 80 +; LSE-NEXT: str x0, [sp, #56] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0, #8] +; LSE-NEXT: ldr x9, [x0] +; LSE-NEXT: str x9, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: b .LBB4_1 +; LSE-NEXT: .LBB4_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload +; LSE-NEXT: adds x2, x8, #1 // =1 +; LSE-NEXT: mov x11, xzr +; LSE-NEXT: adcs x11, x10, x11 +; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 +; LSE-NEXT: mov x3, x11 +; LSE-NEXT: mov x0, x8 +; LSE-NEXT: mov x1, x10 +; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill +; LSE-NEXT: caspal x0, x1, x2, x3, [x9] +; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill +; LSE-NEXT: mov x9, x1 +; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: eor x11, x9, x10 +; LSE-NEXT: mov x10, x0 +; LSE-NEXT: str x10, [sp, #48] // 8-byte Folded Spill +; LSE-NEXT: eor x8, x10, x8 +; LSE-NEXT: orr x8, x8, x11 +; LSE-NEXT: str x10, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: cbnz x8, .LBB4_1 +; LSE-NEXT: b .LBB4_2 +; LSE-NEXT: .LBB4_2: // %atomicrmw.end +; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #80 // =80 +; LSE-NEXT: ret +entry: + %res = atomicrmw add i128* %dst, i128 1 seq_cst + ret i128 %res +} +define i8 @test_rmw_nand_8(i8* %dst) { +; NOLSE-LABEL: test_rmw_nand_8: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrb w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB5_1 +; NOLSE-NEXT: .LBB5_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB5_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB5_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB5_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrb w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxtb +; NOLSE-NEXT: b.ne .LBB5_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2 +; NOLSE-NEXT: stlxrb w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB5_2 +; NOLSE-NEXT: .LBB5_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxtb +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB5_1 +; NOLSE-NEXT: b .LBB5_5 +; NOLSE-NEXT: .LBB5_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_8: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldrb w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB5_1 +; LSE-NEXT: .LBB5_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casalb w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9, uxtb +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB5_1 +; LSE-NEXT: b .LBB5_2 +; LSE-NEXT: .LBB5_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i8* %dst, i8 1 seq_cst + ret i8 %res +} + +define i16 @test_rmw_nand_16(i16* %dst) { +; NOLSE-LABEL: test_rmw_nand_16: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrh w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB6_1 +; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB6_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB6_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrh w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxth +; NOLSE-NEXT: b.ne .LBB6_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=2 +; NOLSE-NEXT: stlxrh w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB6_2 +; NOLSE-NEXT: .LBB6_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxth +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB6_1 +; NOLSE-NEXT: b .LBB6_5 +; NOLSE-NEXT: .LBB6_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_16: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldrh w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB6_1 +; LSE-NEXT: .LBB6_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casalh w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9, uxth +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB6_1 +; LSE-NEXT: b .LBB6_2 +; LSE-NEXT: .LBB6_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i16* %dst, i16 1 seq_cst + ret i16 %res +} + +define i32 @test_rmw_nand_32(i32* %dst) { +; NOLSE-LABEL: test_rmw_nand_32: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB7_1 +; NOLSE-NEXT: .LBB7_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB7_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB7_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB7_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr w8, [x11] +; NOLSE-NEXT: cmp w8, w9 +; NOLSE-NEXT: b.ne .LBB7_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2 +; NOLSE-NEXT: stlxr w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB7_2 +; NOLSE-NEXT: .LBB7_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB7_1 +; NOLSE-NEXT: b .LBB7_5 +; NOLSE-NEXT: .LBB7_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_32: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldr w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB7_1 +; LSE-NEXT: .LBB7_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casal w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9 +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB7_1 +; LSE-NEXT: b .LBB7_2 +; LSE-NEXT: .LBB7_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i32* %dst, i32 1 seq_cst + ret i32 %res +} + +define i64 @test_rmw_nand_64(i64* %dst) { +; NOLSE-LABEL: test_rmw_nand_64: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0] +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB8_1 +; NOLSE-NEXT: .LBB8_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB8_2 Depth 2 +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mov w8, w9 +; NOLSE-NEXT: mvn w10, w8 +; NOLSE-NEXT: // implicit-def: $x8 +; NOLSE-NEXT: mov w8, w10 +; NOLSE-NEXT: orr x12, x8, #0xfffffffffffffffe +; NOLSE-NEXT: .LBB8_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB8_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr x8, [x11] +; NOLSE-NEXT: cmp x8, x9 +; NOLSE-NEXT: b.ne .LBB8_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2 +; NOLSE-NEXT: stlxr w10, x12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB8_2 +; NOLSE-NEXT: .LBB8_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 +; NOLSE-NEXT: subs x9, x8, x9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB8_1 +; NOLSE-NEXT: b .LBB8_5 +; NOLSE-NEXT: .LBB8_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_64: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0] +; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: b .LBB8_1 +; LSE-NEXT: .LBB8_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: mvn w10, w8 +; LSE-NEXT: // implicit-def: $x8 +; LSE-NEXT: mov w8, w10 +; LSE-NEXT: orr x10, x8, #0xfffffffffffffffe +; LSE-NEXT: mov x8, x9 +; LSE-NEXT: casal x8, x10, [x11] +; LSE-NEXT: str x8, [sp] // 8-byte Folded Spill +; LSE-NEXT: subs x9, x8, x9 +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: b.ne .LBB8_1 +; LSE-NEXT: b .LBB8_2 +; LSE-NEXT: .LBB8_2: // %atomicrmw.end +; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i64* %dst, i64 1 seq_cst + ret i64 %res +} + +define i128 @test_rmw_nand_128(i128* %dst) { +; NOLSE-LABEL: test_rmw_nand_128: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #48 // =48 +; NOLSE-NEXT: .cfi_def_cfa_offset 48 +; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0, #8] +; NOLSE-NEXT: ldr x9, [x0] +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB9_1 +; NOLSE-NEXT: .LBB9_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB9_2 Depth 2 +; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: mov w9, w8 +; NOLSE-NEXT: mvn w10, w9 +; NOLSE-NEXT: // implicit-def: $x9 +; NOLSE-NEXT: mov w9, w10 +; NOLSE-NEXT: orr x14, x9, #0xfffffffffffffffe +; NOLSE-NEXT: mov x15, #-1 +; NOLSE-NEXT: .LBB9_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB9_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxp x10, x9, [x13] +; NOLSE-NEXT: cmp x10, x8 +; NOLSE-NEXT: cset w12, ne +; NOLSE-NEXT: cmp x9, x11 +; NOLSE-NEXT: cinc w12, w12, ne +; NOLSE-NEXT: cbnz w12, .LBB9_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 +; NOLSE-NEXT: stlxp w12, x14, x15, [x13] +; NOLSE-NEXT: cbnz w12, .LBB9_2 +; NOLSE-NEXT: .LBB9_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1 +; NOLSE-NEXT: eor x11, x9, x11 +; NOLSE-NEXT: eor x8, x10, x8 +; NOLSE-NEXT: orr x8, x8, x11 +; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: cbnz x8, .LBB9_1 +; NOLSE-NEXT: b .LBB9_5 +; NOLSE-NEXT: .LBB9_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #48 // =48 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_128: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #80 // =80 +; LSE-NEXT: .cfi_def_cfa_offset 80 +; LSE-NEXT: str x0, [sp, #56] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0, #8] +; LSE-NEXT: ldr x9, [x0] +; LSE-NEXT: str x9, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: b .LBB9_1 +; LSE-NEXT: .LBB9_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload +; LSE-NEXT: mov x0, x8 +; LSE-NEXT: mov x1, x10 +; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill +; LSE-NEXT: mov w11, w8 +; LSE-NEXT: mvn w12, w11 +; LSE-NEXT: // implicit-def: $x11 +; LSE-NEXT: mov w11, w12 +; LSE-NEXT: orr x2, x11, #0xfffffffffffffffe +; LSE-NEXT: mov x11, #-1 +; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 +; LSE-NEXT: mov x3, x11 +; LSE-NEXT: caspal x0, x1, x2, x3, [x9] +; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill +; LSE-NEXT: mov x9, x1 +; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: eor x11, x9, x10 +; LSE-NEXT: mov x10, x0 +; LSE-NEXT: str x10, [sp, #48] // 8-byte Folded Spill +; LSE-NEXT: eor x8, x10, x8 +; LSE-NEXT: orr x8, x8, x11 +; LSE-NEXT: str x10, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: cbnz x8, .LBB9_1 +; LSE-NEXT: b .LBB9_2 +; LSE-NEXT: .LBB9_2: // %atomicrmw.end +; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #80 // =80 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i128* %dst, i128 1 seq_cst + ret i128 %res +} diff --git a/llvm/test/CodeGen/X86/note-cet-property.ll b/llvm/test/CodeGen/X86/note-cet-property.ll --- a/llvm/test/CodeGen/X86/note-cet-property.ll +++ b/llvm/test/CodeGen/X86/note-cet-property.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple i686-pc-linux < %s | FileCheck %s --check-prefix=X86 ; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck %s --check-prefix=X86_64 +; RUN: llc -mtriple x86_64-pc-linux-gnux32 < %s | FileCheck %s --check-prefix=X86 ; This test checks that the compiler emits a .note.gnu.property section for ; modules with "cf-protection" module flags. diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll --- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll +++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll @@ -48,6 +48,7 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=FAST ; Other chips with slow unaligned memory accesses diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll --- a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll +++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -14,6 +14,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s ; Verify that for the X86_64 processors that are known to have poor latency ; double precision shift instructions we do not generate 'shld' or 'shrd' diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -702,12 +702,12 @@ ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT:.b32 10034 // Length of Unit +; CHECK-NEXT:.b32 10029 // Length of Unit ; CHECK-NEXT:.b8 2 // DWARF version number ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT:.b8 8 // Address Size (in bytes) -; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x272b DW_TAG_compile_unit +; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x2726 DW_TAG_compile_unit ; CHECK-NEXT:.b8 0 // DW_AT_producer ; CHECK-NEXT:.b8 4 // DW_AT_language ; CHECK-NEXT:.b8 0 @@ -8306,7 +8306,7 @@ ; CHECK-NEXT:.b8 3 // DW_AT_decl_line ; CHECK-NEXT:.b32 3345 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 40 // Abbrev [40] 0x2671:0xc4 DW_TAG_subprogram +; CHECK-NEXT:.b8 40 // Abbrev [40] 0x2671:0xbf DW_TAG_subprogram ; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -8386,7 +8386,7 @@ ; CHECK-NEXT:.b8 12 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column -; CHECK-NEXT:.b8 43 // Abbrev [43] 0x2711:0x23 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b8 43 // Abbrev [43] 0x2711:0x1e DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 9791 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 Ltmp9 // DW_AT_low_pc ; CHECK-NEXT:.b64 Ltmp10 // DW_AT_high_pc @@ -8395,8 +8395,6 @@ ; CHECK-NEXT:.b8 5 // DW_AT_call_column ; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2729:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 9820 // DW_AT_abstract_origin -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x272e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 9829 // DW_AT_abstract_origin ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark diff --git a/llvm/test/DebugInfo/salvage-gep.ll b/llvm/test/DebugInfo/salvage-gep.ll deleted file mode 100644 --- a/llvm/test/DebugInfo/salvage-gep.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt %s -dce -S | FileCheck %s - -; Tests the salvaging of GEP instructions, specifically struct indexing and -; non-constant array indexing. - -%struct.S = type { i32, i32 } - -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(%struct.S* %ptr, i64 %offset), -; CHECK-SAME: ![[VAR_OFFSET_PTR:[0-9]+]], -; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 8, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 4, DW_OP_stack_value)) - -; CHECK: ![[VAR_OFFSET_PTR]] = !DILocalVariable(name: "offset_ptr" - -define void @"?foo@@YAXPEAUS@@_J@Z"(%struct.S* %ptr, i64 %offset) !dbg !8 { -entry: - call void @llvm.dbg.value(metadata i64 %offset, metadata !20, metadata !DIExpression()), !dbg !24 - call void @llvm.dbg.value(metadata %struct.S* %ptr, metadata !21, metadata !DIExpression()), !dbg !24 - %arrayidx = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 %offset, !dbg !25 - %b = getelementptr inbounds %struct.S, %struct.S* %arrayidx, i32 0, i32 1, !dbg !25 - call void @llvm.dbg.value(metadata i32* %b, metadata !22, metadata !DIExpression()), !dbg !24 - ret void, !dbg !26 -} - -declare void @llvm.dbg.value(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5, !6} -!llvm.ident = !{!7} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) -!1 = !DIFile(filename: "salvage-gep.cpp", directory: "/") -!2 = !{} -!3 = !{i32 2, !"CodeView", i32 1} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 2} -!6 = !{i32 7, !"PIC Level", i32 2} -!7 = !{!"clang version 11.0.0"} -!8 = distinct !DISubprogram(name: "foo", linkageName: "?foo@@YAXPEAUS@@_J@Z", scope: !9, file: !9, line: 7, type: !10, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !19) -!9 = !DIFile(filename: ".\\salvage-gep.cpp", directory: "/") -!10 = !DISubroutineType(types: !11) -!11 = !{null, !12, !18} -!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64) -!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !9, line: 2, size: 64, flags: DIFlagTypePassByValue, elements: !14, identifier: ".?AUS@@") -!14 = !{!15, !17} -!15 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !13, file: !9, line: 3, baseType: !16, size: 32) -!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!17 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !13, file: !9, line: 4, baseType: !16, size: 32, offset: 32) -!18 = !DIBasicType(name: "long long int", size: 64, encoding: DW_ATE_signed) -!19 = !{!20, !21, !22} -!20 = !DILocalVariable(name: "offset", arg: 2, scope: !8, file: !9, line: 7, type: !18) -!21 = !DILocalVariable(name: "ptr", arg: 1, scope: !8, file: !9, line: 7, type: !12) -!22 = !DILocalVariable(name: "offset_ptr", scope: !8, file: !9, line: 8, type: !23) -!23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64) -!24 = !DILocation(line: 0, scope: !8) -!25 = !DILocation(line: 8, scope: !8) -!26 = !DILocation(line: 9, scope: !8) diff --git a/llvm/test/DebugInfo/salvage-nonconst-binop.ll b/llvm/test/DebugInfo/salvage-nonconst-binop.ll deleted file mode 100644 --- a/llvm/test/DebugInfo/salvage-nonconst-binop.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: opt %s -dce -S | FileCheck %s - -; Tests the salvaging of binary operators that use more than one non-constant -; SSA value. - -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %b), -; CHECK-SAME: ![[VAR_C:[0-9]+]], -; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value)) - -; CHECK: ![[VAR_C]] = !DILocalVariable(name: "c" - -define i32 @"?multiply@@YAHHH@Z"(i32 %a, i32 %b) !dbg !8 { -entry: - call void @llvm.dbg.value(metadata i32 %b, metadata !12, metadata !DIExpression()), !dbg !13 - call void @llvm.dbg.value(metadata i32 %a, metadata !14, metadata !DIExpression()), !dbg !13 - %add = add nsw i32 %a, %b, !dbg !15 - call void @llvm.dbg.value(metadata i32 %add, metadata !16, metadata !DIExpression()), !dbg !13 - %mul = mul nsw i32 %a, %b, !dbg !17 - ret i32 %mul, !dbg !17 -} - -declare void @llvm.dbg.value(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5, !6} -!llvm.ident = !{!7} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 11.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) -!1 = !DIFile(filename: "test.cpp", directory: "/") -!2 = !{} -!3 = !{i32 2, !"CodeView", i32 1} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 2} -!6 = !{i32 7, !"PIC Level", i32 2} -!7 = !{!"clang version 11.0.0"} -!8 = distinct !DISubprogram(name: "multiply", linkageName: "?multiply@@YAHHH@Z", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) -!9 = !DISubroutineType(types: !10) -!10 = !{!11, !11, !11} -!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!12 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 1, type: !11) -!13 = !DILocation(line: 0, scope: !8) -!14 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 1, type: !11) -!15 = !DILocation(line: 2, scope: !8) -!16 = !DILocalVariable(name: "c", scope: !8, file: !1, line: 2, type: !11) -!17 = !DILocation(line: 3, scope: !8) diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll --- a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s -; RUN: opt -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS +; RUN: opt -O1 -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s +; RUN: opt -O1 -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS define void @atomic_swap_f16(half* %ptr, half %val) nounwind { ; CHECK-LABEL: @atomic_swap_f16( diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll --- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: [[TMP36:%.*]] = icmp sgt i64 [[TMP35:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP36]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]] ; CHECK: bb_true: -; CHECK-NEXT: tail call void @check1(i1 false) [[ATTR1:#.*]] +; CHECK-NEXT: tail call void @check1(i1 false) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: bb_false: -; CHECK-NEXT: tail call void @check2(i1 true) [[ATTR1]] +; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR1]] ; CHECK-NEXT: unreachable ; bb: @@ -56,7 +56,7 @@ ; CHECK-NEXT: tail call void @check1(i1 false) ; CHECK-NEXT: unreachable ; CHECK: bb_false: -; CHECK-NEXT: tail call void @check2(i1 true) [[ATTR1]] +; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR1]] ; CHECK-NEXT: unreachable ; bb: @@ -958,8 +958,8 @@ ret void } -define void @test_icmp_mask_two_values(i32 %a) { -; CHECK-LABEL: @test_icmp_mask_two_values( +define void @test_icmp_mask_eq_two_values(i32 %a) { +; CHECK-LABEL: @test_icmp_mask_eq_two_values( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], -2 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 10 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] @@ -991,8 +991,8 @@ ret void } -define void @test_icmp_mask_bit_set(i32 %a) { -; CHECK-LABEL: @test_icmp_mask_bit_set( +define void @test_icmp_mask_eq_bit_set(i32 %a) { +; CHECK-LABEL: @test_icmp_mask_eq_bit_set( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 32 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 32 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] @@ -1019,8 +1019,8 @@ ret void } -define void @test_icmp_mask_bit_unset(i32 %a) { -; CHECK-LABEL: @test_icmp_mask_bit_unset( +define void @test_icmp_mask_eq_bit_unset(i32 %a) { +; CHECK-LABEL: @test_icmp_mask_eq_bit_unset( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 32 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] @@ -1047,8 +1047,8 @@ ret void } -define void @test_icmp_mask_wrong_predicate(i32 %a) { -; CHECK-LABEL: @test_icmp_mask_wrong_predicate( +define void @test_icmp_mask_eq_wrong_predicate(i32 %a) { +; CHECK-LABEL: @test_icmp_mask_eq_wrong_predicate( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], -2 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 10 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] @@ -1084,4 +1084,93 @@ ret void } +define void @test_icmp_mask_ne(i32 %a) { +; CHECK-LABEL: @test_icmp_mask_ne( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: call void @check1(i1 true) +; CHECK-NEXT: [[CMP3:%.*]] = icmp ugt i32 [[A]], 2 +; CHECK-NEXT: call void @check1(i1 [[CMP3]]) +; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i32 [[A]], -1 +; CHECK-NEXT: call void @check1(i1 [[CMP4]]) +; CHECK-NEXT: ret void +; CHECK: if.false: +; CHECK-NEXT: ret void +; + %and = and i32 %a, 6 + %cmp = icmp ne i32 %and, 0 + br i1 %cmp, label %if.true, label %if.false + +if.true: + %cmp2 = icmp uge i32 %a, 2 + call void @check1(i1 %cmp2) + %cmp3 = icmp ugt i32 %a, 2 + call void @check1(i1 %cmp3) + %cmp4 = icmp ult i32 %a, -1 + call void @check1(i1 %cmp4) + ret void + +if.false: + ret void +} + +define void @test_icmp_mask_ne_nonzero_cmp(i32 %a) { +; CHECK-LABEL: @test_icmp_mask_ne_nonzero_cmp( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 6 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 6 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: [[CMP2:%.*]] = icmp uge i32 [[A]], 2 +; CHECK-NEXT: call void @check1(i1 [[CMP2]]) +; CHECK-NEXT: [[CMP3:%.*]] = icmp ugt i32 [[A]], 2 +; CHECK-NEXT: call void @check1(i1 [[CMP3]]) +; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i32 [[A]], -1 +; CHECK-NEXT: call void @check1(i1 [[CMP4]]) +; CHECK-NEXT: ret void +; CHECK: if.false: +; CHECK-NEXT: ret void +; + %and = and i32 %a, 6 + %cmp = icmp ne i32 %and, 6 + br i1 %cmp, label %if.true, label %if.false + +if.true: + %cmp2 = icmp uge i32 %a, 2 + call void @check1(i1 %cmp2) + %cmp3 = icmp ugt i32 %a, 2 + call void @check1(i1 %cmp3) + %cmp4 = icmp ult i32 %a, -1 + call void @check1(i1 %cmp4) + ret void + +if.false: + ret void +} + +define void @test_icmp_mask_ne_zero_mask(i32 %a) { +; CHECK-LABEL: @test_icmp_mask_ne_zero_mask( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 0 +; CHECK-NEXT: br i1 false, label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[A]], 0 +; CHECK-NEXT: call void @check1(i1 [[CMP2]]) +; CHECK-NEXT: ret void +; CHECK: if.false: +; CHECK-NEXT: ret void +; + %and = and i32 %a, 0 + %cmp = icmp ne i32 %and, 0 + br i1 %cmp, label %if.true, label %if.false + +if.true: + %cmp2 = icmp ne i32 %a, 0 + call void @check1(i1 %cmp2) + ret void + +if.false: + ret void +} + attributes #4 = { noreturn } diff --git a/llvm/test/Transforms/InstCombine/debuginfo-sink.ll b/llvm/test/Transforms/InstCombine/debuginfo-sink.ll --- a/llvm/test/Transforms/InstCombine/debuginfo-sink.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo-sink.ll @@ -33,25 +33,23 @@ ; value range. ; CHECK-LABEL: define i32 @bar( -; CHECK: call void @llvm.dbg.value(metadata * undef, +; CHECK: call void @llvm.dbg.value(metadata i32* undef, ; CHECK-NEXT: br label %sink2 -define i32 @bar(* %a, i32 %b) !dbg !70 { +define i32 @bar(i32 *%a, i32 %b) !dbg !70 { entry: - %gep = getelementptr , * %a, i32 %b - call void @llvm.dbg.value(metadata * %gep, metadata !73, metadata !12), !dbg !74 + %gep = getelementptr i32, i32 *%a, i32 %b + call void @llvm.dbg.value(metadata i32* %gep, metadata !73, metadata !12), !dbg !74 br label %sink2 sink2: ; CHECK-LABEL: sink2: -; CHECK: call void @llvm.dbg.value(metadata * %gep, +; CHECK: call void @llvm.dbg.value(metadata i32* %gep, ; CHECK-SAME: metadata !{{[0-9]+}}, metadata !DIExpression()) ; CHECK-NEXT: load -; CHECK-NEXT: extractelement ; CHECK-NEXT: ret - %0 = load , * %gep - %extract = extractelement %0, i32 1 - ret i32 %extract + %0 = load i32, i32* %gep + ret i32 %0 } ; This GEP is sunk, and has multiple debug uses in the same block. Check that diff --git a/llvm/test/Transforms/InstCombine/eq-of-parts.ll b/llvm/test/Transforms/InstCombine/eq-of-parts.ll --- a/llvm/test/Transforms/InstCombine/eq-of-parts.ll +++ b/llvm/test/Transforms/InstCombine/eq-of-parts.ll @@ -4,6 +4,10 @@ ; Combine equality comparisons of adjacent extracted integers parts into ; a comparison of a larger part. Start with some examples... +declare void @use.i32(i32) +declare void @use.i8(i8) +declare void @use.i1(i1) + define i1 @eq_10(i32 %x, i32 %y) { ; CHECK-LABEL: @eq_10( ; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X:%.*]] to i8 @@ -298,10 +302,6 @@ ; Test variants with extra uses. -declare void @use.i32(i32) -declare void @use.i8(i8) -declare void @use.i1(i1) - define i1 @eq_21_extra_use_lshr(i32 %x, i32 %y) { ; CHECK-LABEL: @eq_21_extra_use_lshr( ; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 @@ -659,3 +659,775 @@ %c.210 = and i1 %c.2, %c.1 ret i1 %c.210 } + +define i1 @eq_21_wrong_pred1(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_21_wrong_pred1( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = and i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp eq i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = and i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @eq_21_wrong_pred2(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_21_wrong_pred2( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = and i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = and i1 %c.2, %c.1 + ret i1 %c.210 +} + +; +; Now the same thing again, but for or ne instead of and eq. +; + +define i1 @ne_10(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_10( +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[Y_0:%.*]] = trunc i32 [[Y:%.*]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[C_0:%.*]] = icmp ne i8 [[X_0]], [[Y_0]] +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_10:%.*]] = or i1 [[C_0]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_10]] +; + %x.0 = trunc i32 %x to i8 + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %y.0 = trunc i32 %y to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %c.0 = icmp ne i8 %x.0, %y.0 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.10 = or i1 %c.0, %c.1 + ret i1 %c.10 +} + +define i1 @ne_210(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_210( +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_0:%.*]] = trunc i32 [[Y:%.*]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_0:%.*]] = icmp ne i8 [[X_0]], [[Y_0]] +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_10:%.*]] = or i1 [[C_0]], [[C_1]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_10]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.0 = trunc i32 %x to i8 + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.0 = trunc i32 %y to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.0 = icmp ne i8 %x.0, %y.0 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.10 = or i1 %c.0, %c.1 + %c.210 = or i1 %c.2, %c.10 + ret i1 %c.210 +} + +define i1 @ne_3210(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_3210( +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[X_3_EXT:%.*]] = lshr i32 [[X]], 24 +; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[X_3_EXT]] to i8 +; CHECK-NEXT: [[Y_0:%.*]] = trunc i32 [[Y:%.*]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[Y_3_EXT:%.*]] = lshr i32 [[Y]], 24 +; CHECK-NEXT: [[Y_3:%.*]] = trunc i32 [[Y_3_EXT]] to i8 +; CHECK-NEXT: [[C_0:%.*]] = icmp ne i8 [[X_0]], [[Y_0]] +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_3:%.*]] = icmp ne i8 [[X_3]], [[Y_3]] +; CHECK-NEXT: [[C_10:%.*]] = or i1 [[C_0]], [[C_1]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_10]] +; CHECK-NEXT: [[C_3210:%.*]] = or i1 [[C_3]], [[C_210]] +; CHECK-NEXT: ret i1 [[C_3210]] +; + %x.0 = trunc i32 %x to i8 + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %x.3.ext = lshr i32 %x, 24 + %x.3 = trunc i32 %x.3.ext to i8 + %y.0 = trunc i32 %y to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %y.3.ext = lshr i32 %y, 24 + %y.3 = trunc i32 %y.3.ext to i8 + %c.0 = icmp ne i8 %x.0, %y.0 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.3 = icmp ne i8 %x.3, %y.3 + %c.10 = or i1 %c.0, %c.1 + %c.210 = or i1 %c.2, %c.10 + %c.3210 = or i1 %c.3, %c.210 + ret i1 %c.3210 +} + +define i1 @ne_21(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +; Test commuted variants of ne_21. + +define i1 @ne_21_comm_or(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_comm_or( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_1]], [[C_2]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.1, %c.2 + ret i1 %c.210 +} + +define i1 @ne_21_comm_ne(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_comm_ne( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[Y_2]], [[X_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %y.2, %x.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_comm_ne2(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_comm_ne2( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[Y_1]], [[X_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %y.1, %x.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +; Test vector variant. + +define <2x i1> @ne_21_vector(<2x i32> %x, <2x i32> %y) { +; CHECK-LABEL: @ne_21_vector( +; CHECK-NEXT: [[X_321:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[X_1:%.*]] = trunc <2 x i32> [[X_321]] to <2 x i8> +; CHECK-NEXT: [[X_32:%.*]] = lshr <2 x i32> [[X]], +; CHECK-NEXT: [[X_2:%.*]] = trunc <2 x i32> [[X_32]] to <2 x i8> +; CHECK-NEXT: [[Y_321:%.*]] = lshr <2 x i32> [[Y:%.*]], +; CHECK-NEXT: [[Y_1:%.*]] = trunc <2 x i32> [[Y_321]] to <2 x i8> +; CHECK-NEXT: [[Y_32:%.*]] = lshr <2 x i32> [[Y]], +; CHECK-NEXT: [[Y_2:%.*]] = trunc <2 x i32> [[Y_32]] to <2 x i8> +; CHECK-NEXT: [[C_1:%.*]] = icmp ne <2 x i8> [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne <2 x i8> [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or <2 x i1> [[C_2]], [[C_1]] +; CHECK-NEXT: ret <2 x i1> [[C_210]] +; + %x.321 = lshr <2x i32> %x, + %x.1 = trunc <2x i32> %x.321 to <2x i8> + %x.32 = lshr <2x i32> %x, + %x.2 = trunc <2x i32> %x.32 to <2x i8> + %y.321 = lshr <2x i32> %y, + %y.1 = trunc <2x i32> %y.321 to <2x i8> + %y.32 = lshr <2x i32> %y, + %y.2 = trunc <2x i32> %y.32 to <2x i8> + %c.1 = icmp ne <2x i8> %x.1, %y.1 + %c.2 = icmp ne <2x i8> %x.2, %y.2 + %c.210 = or <2x i1> %c.2, %c.1 + ret <2 x i1> %c.210 +} + +; Test irregular bit widths. This also tests the case where +; all the involved bit widths or offsets are different. + +define i1 @ne_irregular_bit_widths(i31 %x, i31 %y) { +; CHECK-LABEL: @ne_irregular_bit_widths( +; CHECK-NEXT: [[X_321:%.*]] = lshr i31 [[X:%.*]], 7 +; CHECK-NEXT: [[X_1:%.*]] = trunc i31 [[X_321]] to i6 +; CHECK-NEXT: [[X_32:%.*]] = lshr i31 [[X]], 13 +; CHECK-NEXT: [[X_2:%.*]] = trunc i31 [[X_32]] to i5 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i31 [[Y:%.*]], 7 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i31 [[Y_321]] to i6 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i31 [[Y]], 13 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i31 [[Y_32]] to i5 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i6 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i5 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i31 %x, 7 + %x.1 = trunc i31 %x.321 to i6 + %x.32 = lshr i31 %x, 13 + %x.2 = trunc i31 %x.32 to i5 + %y.321 = lshr i31 %y, 7 + %y.1 = trunc i31 %y.321 to i6 + %y.32 = lshr i31 %y, 13 + %y.2 = trunc i31 %y.32 to i5 + %c.1 = icmp ne i6 %x.1, %y.1 + %c.2 = icmp ne i5 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +; Test variants with extra uses. + +define i1 @ne_21_extra_use_lshr(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_extra_use_lshr( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: call void @use.i32(i32 [[X_321]]) +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_1]], [[C_2]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + call void @use.i32(i32 %x.321) + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.1, %c.2 + ret i1 %c.210 +} + +define i1 @ne_21_extra_use_trunc(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_extra_use_trunc( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: call void @use.i8(i8 [[X_1]]) +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_1]], [[C_2]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + call void @use.i8(i8 %x.1) + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.1, %c.2 + ret i1 %c.210 +} + +define i1 @ne_21_extra_use_ne1(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_extra_use_ne1( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: call void @use.i1(i1 [[C_1]]) +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_1]], [[C_2]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + call void @use.i1(i1 %c.1) + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.1, %c.2 + ret i1 %c.210 +} + +define i1 @ne_21_extra_use_ne2(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_extra_use_ne2( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: call void @use.i1(i1 [[C_2]]) +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_1]], [[C_2]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + call void @use.i1(i1 %c.2) + %c.210 = or i1 %c.1, %c.2 + ret i1 %c.210 +} + +; Negative tests. + +define i1 @ne_21_wrong_op1(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @ne_21_wrong_op1( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[Z:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X:%.*]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %z, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_wrong_op2(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @ne_21_wrong_op2( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[Z:%.*]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %z, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_wrong_op3(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @ne_21_wrong_op3( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Z:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y:%.*]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %z, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_wrong_op4(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @ne_21_wrong_op4( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Z:%.*]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %z, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_wrong_shift1(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_wrong_shift1( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 7 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 7 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_wrong_shift2(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_wrong_shift2( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 15 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 15 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_not_adjacent(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_not_adjacent( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 17 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 17 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 17 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 17 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_shift_in_zeros(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_shift_in_zeros( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i24 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i24 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp ne i24 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i24 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i24 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp ne i24 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_wrong_pred1(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_wrong_pred1( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp ne i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp ne i8 %x.1, %y.1 + %c.2 = icmp eq i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} + +define i1 @ne_21_wrong_pred2(i32 %x, i32 %y) { +; CHECK-LABEL: @ne_21_wrong_pred2( +; CHECK-NEXT: [[X_321:%.*]] = lshr i32 [[X:%.*]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[X_321]] to i8 +; CHECK-NEXT: [[X_32:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[X_32]] to i8 +; CHECK-NEXT: [[Y_321:%.*]] = lshr i32 [[Y:%.*]], 8 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_321]] to i8 +; CHECK-NEXT: [[Y_32:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_2:%.*]] = trunc i32 [[Y_32]] to i8 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[X_1]], [[Y_1]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i8 [[X_2]], [[Y_2]] +; CHECK-NEXT: [[C_210:%.*]] = or i1 [[C_2]], [[C_1]] +; CHECK-NEXT: ret i1 [[C_210]] +; + %x.321 = lshr i32 %x, 8 + %x.1 = trunc i32 %x.321 to i8 + %x.32 = lshr i32 %x, 16 + %x.2 = trunc i32 %x.32 to i8 + %y.321 = lshr i32 %y, 8 + %y.1 = trunc i32 %y.321 to i8 + %y.32 = lshr i32 %y, 16 + %y.2 = trunc i32 %y.32 to i8 + %c.1 = icmp eq i8 %x.1, %y.1 + %c.2 = icmp eq i8 %x.2, %y.2 + %c.210 = or i1 %c.2, %c.1 + ret i1 %c.210 +} diff --git a/llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll b/llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll --- a/llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll +++ b/llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll @@ -1,73 +1,95 @@ -; RUN: opt < %s -reassociate -S | FileCheck %s - -; Check that reassociate pass now undefs debug intrinsics that reference a value -; that gets dropped and cannot be salvaged. - -; CHECK-NOT: %add = fadd fast float %a, %b -; CHECK: call void @llvm.dbg.value(metadata float undef, metadata [[VAR_X:![0-9]+]], metadata !DIExpression()) - -; CHECK-LABEL: if.then: -; CHECK-NOT: %add1 = fadd fast float %add, %c -; CHECK: call void @llvm.dbg.value(metadata float undef, metadata [[VAR_Y:![0-9]+]], metadata !DIExpression()) -; CHECK-LABEL: !0 = -; CHECK-DAG: [[VAR_Y]] = !DILocalVariable(name: "y" -; CHECK-DAG: [[VAR_X]] = !DILocalVariable(name: "x" - -define float @"?foo@@YAMMMMM@Z"(float %a, float %b, float %c, float %d) !dbg !8 { -entry: - call void @llvm.dbg.value(metadata float %d, metadata !12, metadata !DIExpression()), !dbg !13 - call void @llvm.dbg.value(metadata float %c, metadata !14, metadata !DIExpression()), !dbg !13 - call void @llvm.dbg.value(metadata float %b, metadata !15, metadata !DIExpression()), !dbg !13 - call void @llvm.dbg.value(metadata float %a, metadata !16, metadata !DIExpression()), !dbg !13 - %add = fadd fast float %a, %b, !dbg !17 - call void @llvm.dbg.value(metadata float %add, metadata !18, metadata !DIExpression()), !dbg !13 - %cmp = fcmp fast oeq float %d, 4.000000e+00, !dbg !19 - br i1 %cmp, label %if.then, label %return, !dbg !19 +; RUN: opt < %s -reassociate -S | FileCheck %s -if.then: ; preds = %entry - %add1 = fadd fast float %add, %c, !dbg !20 - call void @llvm.dbg.value(metadata float %add1, metadata !23, metadata !DIExpression()), !dbg !24 - %sub = fsub fast float %add, 1.200000e+01, !dbg !25 - %sub2 = fsub fast float %add1, %sub, !dbg !25 - %mul = fmul fast float %sub2, 2.000000e+01, !dbg !25 - %div = fdiv fast float %mul, 3.000000e+00, !dbg !25 - br label %return, !dbg !25 +; Check that reassociate pass now undefs debug intrinsics that reference a value +; that gets dropped and cannot be salvaged. -return: ; preds = %entry, %if.then - %retval.0 = phi float [ %div, %if.then ], [ 0.000000e+00, %entry ], !dbg !13 - ret float %retval.0, !dbg !26 +define hidden i32 @main() local_unnamed_addr { +entry: + %foo = alloca i32, align 4, !dbg !20 + %foo.0.foo.0..sroa_cast = bitcast i32* %foo to i8*, !dbg !20 + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %foo.0.foo.0..sroa_cast), !dbg !20 + store volatile i32 4, i32* %foo, align 4, !dbg !20, !tbaa !21 + %foo.0.foo.0. = load volatile i32, i32* %foo, align 4, !dbg !25, !tbaa !21 + %foo.0.foo.0.15 = load volatile i32, i32* %foo, align 4, !dbg !27, !tbaa !21 + %foo.0.foo.0.16 = load volatile i32, i32* %foo, align 4, !dbg !28, !tbaa !21 + ; CHECK-NOT: %add = add nsw i32 %foo.0.foo.0., %foo.0.foo.0.15 + %add = add nsw i32 %foo.0.foo.0., %foo.0.foo.0.15, !dbg !29 + ; CHECK: call void @llvm.dbg.value(metadata i32 undef, metadata [[VAR_A:![0-9]+]], metadata !DIExpression()) + call void @llvm.dbg.value(metadata i32 %add, metadata !19, metadata !DIExpression()), !dbg !26 + %foo.0.foo.0.17 = load volatile i32, i32* %foo, align 4, !dbg !30, !tbaa !21 + %cmp = icmp eq i32 %foo.0.foo.0.17, 4, !dbg !30 + br i1 %cmp, label %if.then, label %if.end, !dbg !32 + + ; CHECK-LABEL: if.then: +if.then: + ; CHECK-NOT: %add1 = add nsw i32 %add, %foo.0.foo.0.16 + %add1 = add nsw i32 %add, %foo.0.foo.0.16, !dbg !33 + ; CHECK: call void @llvm.dbg.value(metadata i32 undef, metadata [[VAR_A]], metadata !DIExpression()) + call void @llvm.dbg.value(metadata i32 %add1, metadata !19, metadata !DIExpression()), !dbg !26 + ; CHECK: call void @llvm.dbg.value(metadata i32 undef, metadata [[VAR_CHEESE:![0-9]+]], metadata !DIExpression()) + call void @llvm.dbg.value(metadata i32 %add, metadata !18, metadata !DIExpression()), !dbg !26 + %sub = add nsw i32 %add, -12, !dbg !34 + %sub3 = sub nsw i32 %add1, %sub, !dbg !34 + %mul = mul nsw i32 %sub3, 20, !dbg !36 + %div = sdiv i32 %mul, 3, !dbg !37 + br label %if.end, !dbg !38 + +if.end: + %a.0 = phi i32 [ %div, %if.then ], [ 0, %entry ], !dbg !39 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %foo.0.foo.0..sroa_cast), !dbg !40 + ret i32 %a.0, !dbg !41 } -declare void @llvm.dbg.value(metadata, metadata, metadata) +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.dbg.value(metadata, metadata, metadata) #2 !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} !llvm.ident = !{!7} -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 11.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) -!1 = !DIFile(filename: "undef_intrinsics_when_deleting_instructions.cpp", directory: "/") +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "F:\") !2 = !{} -!3 = !{i32 2, !"CodeView", i32 1} +!3 = !{i32 2, !"Dwarf Version", i32 4} !4 = !{i32 2, !"Debug Info Version", i32 3} !5 = !{i32 1, !"wchar_size", i32 2} !6 = !{i32 7, !"PIC Level", i32 2} -!7 = !{!"clang version 11.0.0"} -!8 = distinct !DISubprogram(name: "foo", linkageName: "?foo@@YAMMMMM@Z", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) -!9 = !DISubroutineType(types: !10) -!10 = !{!11, !11, !11, !11, !11} -!11 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) -!12 = !DILocalVariable(name: "d", arg: 4, scope: !8, file: !1, line: 1, type: !11) -!13 = !DILocation(line: 0, scope: !8) -!14 = !DILocalVariable(name: "c", arg: 3, scope: !8, file: !1, line: 1, type: !11) -!15 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 1, type: !11) -!16 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 1, type: !11) -!17 = !DILocation(line: 2, scope: !8) -!18 = !DILocalVariable(name: "x", scope: !8, file: !1, line: 2, type: !11) -!19 = !DILocation(line: 3, scope: !8) -!20 = !DILocation(line: 4, scope: !21) -!21 = distinct !DILexicalBlock(scope: !22, file: !1, line: 3) -!22 = distinct !DILexicalBlock(scope: !8, file: !1, line: 3) -!23 = !DILocalVariable(name: "y", scope: !21, file: !1, line: 4, type: !11) -!24 = !DILocation(line: 0, scope: !21) -!25 = !DILocation(line: 5, scope: !21) -!26 = !DILocation(line: 8, scope: !8) +!7 = !{!"clang version 10.0.0"} +!8 = distinct !DISubprogram(name: "main", scope: !9, file: !9, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!9 = !DIFile(filename: "./test.cpp", directory: "F:\") +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!14, !16, !17, !18, !19} +!14 = !DILocalVariable(name: "foo", scope: !8, file: !9, line: 2, type: !15) +!15 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !12) +!16 = !DILocalVariable(name: "read1", scope: !8, file: !9, line: 3, type: !12) +!17 = !DILocalVariable(name: "read2", scope: !8, file: !9, line: 4, type: !12) +; CHECK: [[VAR_CHEESE]] = !DILocalVariable(name: "cheese" +!18 = !DILocalVariable(name: "cheese", scope: !8, file: !9, line: 6, type: !12) +; CHECK: [[VAR_A]] = !DILocalVariable(name: "a" +!19 = !DILocalVariable(name: "a", scope: !8, file: !9, line: 7, type: !12) +!20 = !DILocation(line: 2, scope: !8) +!21 = !{!22, !22, i64 0} +!22 = !{!"int", !23, i64 0} +!23 = !{!"omnipotent char", !24, i64 0} +!24 = !{!"Simple C++ TBAA"} +!25 = !DILocation(line: 3, scope: !8) +!26 = !DILocation(line: 0, scope: !8) +!27 = !DILocation(line: 4, scope: !8) +!28 = !DILocation(line: 6, scope: !8) +!29 = !DILocation(line: 7, scope: !8) +!30 = !DILocation(line: 10, scope: !31) +!31 = distinct !DILexicalBlock(scope: !8, file: !9, line: 10) +!32 = !DILocation(line: 10, scope: !8) +!33 = !DILocation(line: 8, scope: !8) +!34 = !DILocation(line: 12, scope: !35) +!35 = distinct !DILexicalBlock(scope: !31, file: !9, line: 10) +!36 = !DILocation(line: 13, scope: !35) +!37 = !DILocation(line: 14, scope: !35) +!38 = !DILocation(line: 15, scope: !35) +!39 = !DILocation(line: 0, scope: !31) +!40 = !DILocation(line: 20, scope: !8) +!41 = !DILocation(line: 19, scope: !8) diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-2.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-2.s @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s + +imul %rax, %rbx +lzcnt %ax, %bx +add %ecx, %ebx + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Total uOps: 3 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.38 +# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: Block RThroughput: 1.3 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 imulq %rax, %rbx +# CHECK-NEXT: 1 1 1.00 lzcntw %ax, %bx +# CHECK-NEXT: 1 1 0.25 addl %ecx, %ebx + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeeER . imulq %rax, %rbx +# CHECK-NEXT: [0,1] D===eER. lzcntw %ax, %bx +# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx +# CHECK-NEXT: 1. 1 4.0 0.0 0.0 lzcntw %ax, %bx +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 3.3 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s @@ -0,0 +1,102 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1500 -timeline -timeline-max-iterations=6 < %s | FileCheck %s + +# The ILP is limited by the false dependency on %dx. So, the mov cannot execute +# in parallel with the add. + +add %cx, %dx +mov %ax, %dx +xor %bx, %dx + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 4500 +# CHECK-NEXT: Total Cycles: 4503 +# CHECK-NEXT: Total uOps: 4500 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 1.5 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.25 addw %cx, %dx +# CHECK-NEXT: 1 1 1.00 movw %ax, %dx +# CHECK-NEXT: 1 1 0.25 xorw %bx, %dx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 1.50 1.50 1.50 1.50 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addw %cx, %dx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - movw %ax, %dx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorw %bx, %dx + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0 + +# CHECK: [0,0] DeER . . . . addw %cx, %dx +# CHECK-NEXT: [0,1] D=eER. . . . movw %ax, %dx +# CHECK-NEXT: [0,2] D==eER . . . xorw %bx, %dx +# CHECK-NEXT: [1,0] D===eER . . . addw %cx, %dx +# CHECK-NEXT: [1,1] D====eER . . . movw %ax, %dx +# CHECK-NEXT: [1,2] D=====eER . . . xorw %bx, %dx +# CHECK-NEXT: [2,0] .D=====eER. . . addw %cx, %dx +# CHECK-NEXT: [2,1] .D======eER . . movw %ax, %dx +# CHECK-NEXT: [2,2] .D=======eER . . xorw %bx, %dx +# CHECK-NEXT: [3,0] .D========eER . . addw %cx, %dx +# CHECK-NEXT: [3,1] .D=========eER . . movw %ax, %dx +# CHECK-NEXT: [3,2] .D==========eER. . xorw %bx, %dx +# CHECK-NEXT: [4,0] . D==========eER . addw %cx, %dx +# CHECK-NEXT: [4,1] . D===========eER . movw %ax, %dx +# CHECK-NEXT: [4,2] . D============eER . xorw %bx, %dx +# CHECK-NEXT: [5,0] . D=============eER . addw %cx, %dx +# CHECK-NEXT: [5,1] . D==============eER. movw %ax, %dx +# CHECK-NEXT: [5,2] . D===============eER xorw %bx, %dx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 6 7.5 0.2 0.0 addw %cx, %dx +# CHECK-NEXT: 1. 6 8.5 0.0 0.0 movw %ax, %dx +# CHECK-NEXT: 2. 6 9.5 0.0 0.0 xorw %bx, %dx +# CHECK-NEXT: 6 8.5 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s @@ -0,0 +1,105 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1500 -timeline -timeline-max-iterations=7 < %s | FileCheck %s + +# The lzcnt cannot execute in parallel with the imul because there is a false +# dependency on %bx. + +imul %ax, %bx +lzcnt %ax, %bx +add %cx, %bx + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 4500 +# CHECK-NEXT: Total Cycles: 7503 +# CHECK-NEXT: Total uOps: 4500 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.60 +# CHECK-NEXT: IPC: 0.60 +# CHECK-NEXT: Block RThroughput: 1.3 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx +# CHECK-NEXT: 1 1 1.00 lzcntw %ax, %bx +# CHECK-NEXT: 1 1 0.25 addw %cx, %bx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 1.67 1.00 1.67 1.67 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imulw %ax, %bx +# CHECK-NEXT: - - - 1.33 - 1.33 1.33 - - - - - - - - - - - - - - - - lzcntw %ax, %bx +# CHECK-NEXT: - - - 0.33 - 0.33 0.33 - - - - - - - - - - - - - - - - addw %cx, %bx + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 01234567 +# CHECK-NEXT: Index 0123456789 0123456789 + +# CHECK: [0,0] DeeeER . . . . . . . imulw %ax, %bx +# CHECK-NEXT: [0,1] D===eER . . . . . . . lzcntw %ax, %bx +# CHECK-NEXT: [0,2] D====eER . . . . . . . addw %cx, %bx +# CHECK-NEXT: [1,0] D=====eeeER . . . . . . imulw %ax, %bx +# CHECK-NEXT: [1,1] D========eER . . . . . . lzcntw %ax, %bx +# CHECK-NEXT: [1,2] D=========eER . . . . . . addw %cx, %bx +# CHECK-NEXT: [2,0] .D=========eeeER . . . . . imulw %ax, %bx +# CHECK-NEXT: [2,1] .D============eER . . . . . lzcntw %ax, %bx +# CHECK-NEXT: [2,2] .D=============eER . . . . . addw %cx, %bx +# CHECK-NEXT: [3,0] .D==============eeeER . . . . imulw %ax, %bx +# CHECK-NEXT: [3,1] .D=================eER . . . . lzcntw %ax, %bx +# CHECK-NEXT: [3,2] .D==================eER . . . . addw %cx, %bx +# CHECK-NEXT: [4,0] . D==================eeeER . . . imulw %ax, %bx +# CHECK-NEXT: [4,1] . D=====================eER . . . lzcntw %ax, %bx +# CHECK-NEXT: [4,2] . D======================eER . . . addw %cx, %bx +# CHECK-NEXT: [5,0] . D=======================eeeER . . imulw %ax, %bx +# CHECK-NEXT: [5,1] . D==========================eER . . lzcntw %ax, %bx +# CHECK-NEXT: [5,2] . D===========================eER . . addw %cx, %bx +# CHECK-NEXT: [6,0] . D===========================eeeER . imulw %ax, %bx +# CHECK-NEXT: [6,1] . D==============================eER. lzcntw %ax, %bx +# CHECK-NEXT: [6,2] . D===============================eER addw %cx, %bx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 7 14.7 0.1 0.0 imulw %ax, %bx +# CHECK-NEXT: 1. 7 17.7 0.0 0.0 lzcntw %ax, %bx +# CHECK-NEXT: 2. 7 18.7 0.0 0.0 addw %cx, %bx +# CHECK-NEXT: 7 17.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s @@ -0,0 +1,80 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1500 -timeline -timeline-max-iterations=8 < %s | FileCheck %s + +lzcnt %ax, %bx ## partial register stall. + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 1500 +# CHECK-NEXT: Total Cycles: 1503 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 lzcntw %ax, %bx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - lzcntw %ax, %bx + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeER . . lzcntw %ax, %bx +# CHECK-NEXT: [1,0] D=eER. . lzcntw %ax, %bx +# CHECK-NEXT: [2,0] D==eER . lzcntw %ax, %bx +# CHECK-NEXT: [3,0] D===eER . lzcntw %ax, %bx +# CHECK-NEXT: [4,0] D====eER . lzcntw %ax, %bx +# CHECK-NEXT: [5,0] D=====eER . lzcntw %ax, %bx +# CHECK-NEXT: [6,0] .D=====eER. lzcntw %ax, %bx +# CHECK-NEXT: [7,0] .D======eER lzcntw %ax, %bx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 8 4.3 0.1 0.0 lzcntw %ax, %bx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s @@ -0,0 +1,98 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1500 -timeline -timeline-max-iterations=4 < %s | FileCheck %s + +# Each lzcnt has a false dependency on %ecx; the first lzcnt has to wait on the +# imul. However, the folded load can start immediately. +# The last lzcnt has a false dependency on %cx. However, even in this case, the +# folded load can start immediately. + +imul %edx, %ecx +lzcnt (%rsp), %cx +lzcnt 2(%rsp), %cx + +# CHECK: Iterations: 1500 +# CHECK-NEXT: Instructions: 4500 +# CHECK-NEXT: Total Cycles: 9003 +# CHECK-NEXT: Total uOps: 4500 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 imull %edx, %ecx +# CHECK-NEXT: 1 5 0.33 * lzcntw (%rsp), %cx +# CHECK-NEXT: 1 5 0.33 * lzcntw 2(%rsp), %cx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 0.67 0.67 0.67 0.67 1.00 0.67 0.67 - - - - - - - - 0.67 0.67 0.67 0.67 0.67 0.67 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imull %edx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 0.33 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - lzcntw (%rsp), %cx +# CHECK-NEXT: 0.33 0.33 0.33 0.33 - 0.33 0.33 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - lzcntw 2(%rsp), %cx + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123456 + +# CHECK: [0,0] DeeeER . . . .. imull %edx, %ecx +# CHECK-NEXT: [0,1] DeeeeeER . . . .. lzcntw (%rsp), %cx +# CHECK-NEXT: [0,2] D=eeeeeER . . . .. lzcntw 2(%rsp), %cx +# CHECK-NEXT: [1,0] D======eeeER . . .. imull %edx, %ecx +# CHECK-NEXT: [1,1] D======eeeeeER . . .. lzcntw (%rsp), %cx +# CHECK-NEXT: [1,2] D=======eeeeeER. . .. lzcntw 2(%rsp), %cx +# CHECK-NEXT: [2,0] .D===========eeeER . .. imull %edx, %ecx +# CHECK-NEXT: [2,1] .D===========eeeeeER. .. lzcntw (%rsp), %cx +# CHECK-NEXT: [2,2] .D============eeeeeER .. lzcntw 2(%rsp), %cx +# CHECK-NEXT: [3,0] .D=================eeeER .. imull %edx, %ecx +# CHECK-NEXT: [3,1] .D=================eeeeeER. lzcntw (%rsp), %cx +# CHECK-NEXT: [3,2] .D==================eeeeeER lzcntw 2(%rsp), %cx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 4 9.5 0.3 0.0 imull %edx, %ecx +# CHECK-NEXT: 1. 4 9.5 0.0 0.0 lzcntw (%rsp), %cx +# CHECK-NEXT: 2. 4 10.5 0.0 0.0 lzcntw 2(%rsp), %cx +# CHECK-NEXT: 4 9.8 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-7.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-7.s @@ -0,0 +1,52 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s + +# An instruction that writes to a 32-bit register will not have any false +# dependence on the corresponding 64-bit register because the upper part of +# the 64-bit register is set to zero + +imulq %rax, %rcx +addl %edx, %ecx +addq %rcx, %rdx + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Total uOps: 3 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.38 +# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 imulq %rax, %rcx +# CHECK-NEXT: 1 1 0.25 addl %edx, %ecx +# CHECK-NEXT: 1 1 0.25 addq %rcx, %rdx + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeeER . imulq %rax, %rcx +# CHECK-NEXT: [0,1] D===eER. addl %edx, %ecx +# CHECK-NEXT: [0,2] D====eER addq %rcx, %rdx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rcx +# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addl %edx, %ecx +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addq %rcx, %rdx +# CHECK-NEXT: 1 3.3 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update.s @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s + +imul %ax, %cx +add %al, %cl +add %ecx, %ebx + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Total uOps: 3 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 0.38 +# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 imulw %ax, %cx +# CHECK-NEXT: 1 1 0.25 addb %al, %cl +# CHECK-NEXT: 1 1 0.25 addl %ecx, %ebx + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeeER . imulw %ax, %cx +# CHECK-NEXT: [0,1] D===eER. addb %al, %cl +# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx +# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl +# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 3.3 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +adcx %ebx, %ecx +adcx (%rbx), %ecx +adcx %rbx, %rcx +adcx (%rbx), %rcx + +adox %ebx, %ecx +adox (%rbx), %ecx +adox %rbx, %rcx +adox (%rbx), %rcx + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 adcxl %ebx, %ecx +# CHECK-NEXT: 1 5 1.00 * adcxl (%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 adcxq %rbx, %rcx +# CHECK-NEXT: 1 5 1.00 * adcxq (%rbx), %rcx +# CHECK-NEXT: 1 1 1.00 adoxl %ebx, %ecx +# CHECK-NEXT: 1 5 1.00 * adoxl (%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 adoxq %rbx, %rcx +# CHECK-NEXT: 1 5 1.00 * adoxq (%rbx), %rcx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 1.33 1.33 1.33 8.00 8.00 8.00 8.00 - - - - - - - - 1.33 1.33 1.33 1.33 1.33 1.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcxl %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adcxl (%rbx), %ecx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcxq %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adcxq (%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adoxl %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adoxl (%rbx), %ecx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adoxq %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adoxq (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +aesdec %xmm0, %xmm2 +aesdec (%rax), %xmm2 + +aesdeclast %xmm0, %xmm2 +aesdeclast (%rax), %xmm2 + +aesenc %xmm0, %xmm2 +aesenc (%rax), %xmm2 + +aesenclast %xmm0, %xmm2 +aesenclast (%rax), %xmm2 + +aesimc %xmm0, %xmm2 +aesimc (%rax), %xmm2 + +aeskeygenassist $22, %xmm0, %xmm2 +aeskeygenassist $22, (%rax), %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 4 0.50 aesdec %xmm0, %xmm2 +# CHECK-NEXT: 1 11 0.50 * aesdec (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 aesdeclast %xmm0, %xmm2 +# CHECK-NEXT: 1 11 0.50 * aesdeclast (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 aesenc %xmm0, %xmm2 +# CHECK-NEXT: 1 11 0.50 * aesenc (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 aesenclast %xmm0, %xmm2 +# CHECK-NEXT: 1 11 0.50 * aesenclast (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 aesimc %xmm0, %xmm2 +# CHECK-NEXT: 1 11 0.50 * aesimc (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 aeskeygenassist $22, %xmm0, %xmm2 +# CHECK-NEXT: 1 11 0.50 * aeskeygenassist $22, (%rax), %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - - - - - - 6.00 6.00 - - 3.00 3.00 - 2.00 2.00 2.00 2.00 2.00 2.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesdec %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesdec (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesdeclast %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesdeclast (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesenc %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesenc (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesenclast %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesenclast (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aesimc %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aesimc (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - aeskeygenassist $22, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - aeskeygenassist $22, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s @@ -0,0 +1,2446 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +vaddpd %xmm0, %xmm1, %xmm2 +vaddpd (%rax), %xmm1, %xmm2 + +vaddpd %ymm0, %ymm1, %ymm2 +vaddpd (%rax), %ymm1, %ymm2 + +vaddps %xmm0, %xmm1, %xmm2 +vaddps (%rax), %xmm1, %xmm2 + +vaddps %ymm0, %ymm1, %ymm2 +vaddps (%rax), %ymm1, %ymm2 + +vaddsd %xmm0, %xmm1, %xmm2 +vaddsd (%rax), %xmm1, %xmm2 + +vaddss %xmm0, %xmm1, %xmm2 +vaddss (%rax), %xmm1, %xmm2 + +vaddsubpd %xmm0, %xmm1, %xmm2 +vaddsubpd (%rax), %xmm1, %xmm2 + +vaddsubpd %ymm0, %ymm1, %ymm2 +vaddsubpd (%rax), %ymm1, %ymm2 + +vaddsubps %xmm0, %xmm1, %xmm2 +vaddsubps (%rax), %xmm1, %xmm2 + +vaddsubps %ymm0, %ymm1, %ymm2 +vaddsubps (%rax), %ymm1, %ymm2 + +vaesdec %xmm0, %xmm1, %xmm2 +vaesdec (%rax), %xmm1, %xmm2 + +vaesdeclast %xmm0, %xmm1, %xmm2 +vaesdeclast (%rax), %xmm1, %xmm2 + +vaesenc %xmm0, %xmm1, %xmm2 +vaesenc (%rax), %xmm1, %xmm2 + +vaesenclast %xmm0, %xmm1, %xmm2 +vaesenclast (%rax), %xmm1, %xmm2 + +vaesimc %xmm0, %xmm2 +vaesimc (%rax), %xmm2 + +vaeskeygenassist $22, %xmm0, %xmm2 +vaeskeygenassist $22, (%rax), %xmm2 + +vandnpd %xmm0, %xmm1, %xmm2 +vandnpd (%rax), %xmm1, %xmm2 + +vandnpd %ymm0, %ymm1, %ymm2 +vandnpd (%rax), %ymm1, %ymm2 + +vandnps %xmm0, %xmm1, %xmm2 +vandnps (%rax), %xmm1, %xmm2 + +vandnps %ymm0, %ymm1, %ymm2 +vandnps (%rax), %ymm1, %ymm2 + +vandpd %xmm0, %xmm1, %xmm2 +vandpd (%rax), %xmm1, %xmm2 + +vandpd %ymm0, %ymm1, %ymm2 +vandpd (%rax), %ymm1, %ymm2 + +vandps %xmm0, %xmm1, %xmm2 +vandps (%rax), %xmm1, %xmm2 + +vandps %ymm0, %ymm1, %ymm2 +vandps (%rax), %ymm1, %ymm2 + +vblendpd $11, %xmm0, %xmm1, %xmm2 +vblendpd $11, (%rax), %xmm1, %xmm2 + +vblendpd $11, %ymm0, %ymm1, %ymm2 +vblendpd $11, (%rax), %ymm1, %ymm2 + +vblendps $11, %xmm0, %xmm1, %xmm2 +vblendps $11, (%rax), %xmm1, %xmm2 + +vblendps $11, %ymm0, %ymm1, %ymm2 +vblendps $11, (%rax), %ymm1, %ymm2 + +vblendvpd %xmm3, %xmm0, %xmm1, %xmm2 +vblendvpd %xmm3, (%rax), %xmm1, %xmm2 + +vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 +vblendvpd %ymm3, (%rax), %ymm1, %ymm2 + +vblendvps %xmm3, %xmm0, %xmm1, %xmm2 +vblendvps %xmm3, (%rax), %xmm1, %xmm2 + +vblendvps %ymm3, %ymm0, %ymm1, %ymm2 +vblendvps %ymm3, (%rax), %ymm1, %ymm2 + +vbroadcastf128 (%rax), %ymm2 + +vbroadcastsd (%rax), %ymm2 + +vbroadcastss (%rax), %xmm2 +vbroadcastss (%rax), %ymm2 + +vcmppd $0, %xmm0, %xmm1, %xmm2 +vcmppd $0, (%rax), %xmm1, %xmm2 + +vcmppd $0, %ymm0, %ymm1, %ymm2 +vcmppd $0, (%rax), %ymm1, %ymm2 + +vcmpps $0, %xmm0, %xmm1, %xmm2 +vcmpps $0, (%rax), %xmm1, %xmm2 + +vcmpps $0, %ymm0, %ymm1, %ymm2 +vcmpps $0, (%rax), %ymm1, %ymm2 + +vcmpsd $0, %xmm0, %xmm1, %xmm2 +vcmpsd $0, (%rax), %xmm1, %xmm2 + +vcmpss $0, %xmm0, %xmm1, %xmm2 +vcmpss $0, (%rax), %xmm1, %xmm2 + +vcomisd %xmm0, %xmm1 +vcomisd (%rax), %xmm1 + +vcomiss %xmm0, %xmm1 +vcomiss (%rax), %xmm1 + +vcvtdq2pd %xmm0, %xmm2 +vcvtdq2pd (%rax), %xmm2 + +vcvtdq2pd %xmm0, %ymm2 +vcvtdq2pd (%rax), %ymm2 + +vcvtdq2ps %xmm0, %xmm2 +vcvtdq2ps (%rax), %xmm2 + +vcvtdq2ps %ymm0, %ymm2 +vcvtdq2ps (%rax), %ymm2 + +vcvtpd2dqx %xmm0, %xmm2 +vcvtpd2dqx (%rax), %xmm2 + +vcvtpd2dqy %ymm0, %xmm2 +vcvtpd2dqy (%rax), %xmm2 + +vcvtpd2psx %xmm0, %xmm2 +vcvtpd2psx (%rax), %xmm2 + +vcvtpd2psy %ymm0, %xmm2 +vcvtpd2psy (%rax), %xmm2 + +vcvtps2dq %xmm0, %xmm2 +vcvtps2dq (%rax), %xmm2 + +vcvtps2dq %ymm0, %ymm2 +vcvtps2dq (%rax), %ymm2 + +vcvtps2pd %xmm0, %xmm2 +vcvtps2pd (%rax), %xmm2 + +vcvtps2pd %xmm0, %ymm2 +vcvtps2pd (%rax), %ymm2 + +vcvtsd2si %xmm0, %ecx +vcvtsd2si %xmm0, %rcx +vcvtsd2si (%rax), %ecx +vcvtsd2si (%rax), %rcx + +vcvtsd2ss %xmm0, %xmm1, %xmm2 +vcvtsd2ss (%rax), %xmm1, %xmm2 + +vcvtsi2sdl %ecx, %xmm0, %xmm2 +vcvtsi2sdq %rcx, %xmm0, %xmm2 +vcvtsi2sdl (%rax), %xmm0, %xmm2 +vcvtsi2sdq (%rax), %xmm0, %xmm2 + +vcvtsi2ssl %ecx, %xmm0, %xmm2 +vcvtsi2ssq %rcx, %xmm0, %xmm2 +vcvtsi2ssl (%rax), %xmm0, %xmm2 +vcvtsi2ssq (%rax), %xmm0, %xmm2 + +vcvtss2sd %xmm0, %xmm1, %xmm2 +vcvtss2sd (%rax), %xmm1, %xmm2 + +vcvtss2si %xmm0, %ecx +vcvtss2si %xmm0, %rcx +vcvtss2si (%rax), %ecx +vcvtss2si (%rax), %rcx + +vcvttpd2dqx %xmm0, %xmm2 +vcvttpd2dqx (%rax), %xmm2 + +vcvttpd2dqy %ymm0, %xmm2 +vcvttpd2dqy (%rax), %xmm2 + +vcvttps2dq %xmm0, %xmm2 +vcvttps2dq (%rax), %xmm2 + +vcvttps2dq %ymm0, %ymm2 +vcvttps2dq (%rax), %ymm2 + +vcvttsd2si %xmm0, %ecx +vcvttsd2si %xmm0, %rcx +vcvttsd2si (%rax), %ecx +vcvttsd2si (%rax), %rcx + +vcvttss2si %xmm0, %ecx +vcvttss2si %xmm0, %rcx +vcvttss2si (%rax), %ecx +vcvttss2si (%rax), %rcx + +vdivpd %xmm0, %xmm1, %xmm2 +vdivpd (%rax), %xmm1, %xmm2 + +vdivpd %ymm0, %ymm1, %ymm2 +vdivpd (%rax), %ymm1, %ymm2 + +vdivps %xmm0, %xmm1, %xmm2 +vdivps (%rax), %xmm1, %xmm2 + +vdivps %ymm0, %ymm1, %ymm2 +vdivps (%rax), %ymm1, %ymm2 + +vdivsd %xmm0, %xmm1, %xmm2 +vdivsd (%rax), %xmm1, %xmm2 + +vdivss %xmm0, %xmm1, %xmm2 +vdivss (%rax), %xmm1, %xmm2 + +vdppd $22, %xmm0, %xmm1, %xmm2 +vdppd $22, (%rax), %xmm1, %xmm2 + +vdpps $22, %xmm0, %xmm1, %xmm2 +vdpps $22, (%rax), %xmm1, %xmm2 + +vdpps $22, %ymm0, %ymm1, %ymm2 +vdpps $22, (%rax), %ymm1, %ymm2 + +vextractf128 $1, %ymm0, %xmm2 +vextractf128 $1, %ymm0, (%rax) + +vextractps $1, %xmm0, %rcx +vextractps $1, %xmm0, (%rax) + +vhaddpd %xmm0, %xmm1, %xmm2 +vhaddpd (%rax), %xmm1, %xmm2 + +vhaddpd %ymm0, %ymm1, %ymm2 +vhaddpd (%rax), %ymm1, %ymm2 + +vhaddps %xmm0, %xmm1, %xmm2 +vhaddps (%rax), %xmm1, %xmm2 + +vhaddps %ymm0, %ymm1, %ymm2 +vhaddps (%rax), %ymm1, %ymm2 + +vhsubpd %xmm0, %xmm1, %xmm2 +vhsubpd (%rax), %xmm1, %xmm2 + +vhsubpd %ymm0, %ymm1, %ymm2 +vhsubpd (%rax), %ymm1, %ymm2 + +vhsubps %xmm0, %xmm1, %xmm2 +vhsubps (%rax), %xmm1, %xmm2 + +vhsubps %ymm0, %ymm1, %ymm2 +vhsubps (%rax), %ymm1, %ymm2 + +vinsertf128 $1, %xmm0, %ymm1, %ymm2 +vinsertf128 $1, (%rax), %ymm1, %ymm2 + +vinsertps $1, %xmm0, %xmm1, %xmm2 +vinsertps $1, (%rax), %xmm1, %xmm2 + +vlddqu (%rax), %xmm2 +vlddqu (%rax), %ymm2 + +vldmxcsr (%rax) + +vmaskmovdqu %xmm0, %xmm1 + +vmaskmovpd (%rax), %xmm0, %xmm2 +vmaskmovpd (%rax), %ymm0, %ymm2 + +vmaskmovpd %xmm0, %xmm1, (%rax) +vmaskmovpd %ymm0, %ymm1, (%rax) + +vmaskmovps (%rax), %xmm0, %xmm2 +vmaskmovps (%rax), %ymm0, %ymm2 + +vmaskmovps %xmm0, %xmm1, (%rax) +vmaskmovps %ymm0, %ymm1, (%rax) + +vmaxpd %xmm0, %xmm1, %xmm2 +vmaxpd (%rax), %xmm1, %xmm2 + +vmaxpd %ymm0, %ymm1, %ymm2 +vmaxpd (%rax), %ymm1, %ymm2 + +vmaxps %xmm0, %xmm1, %xmm2 +vmaxps (%rax), %xmm1, %xmm2 + +vmaxps %ymm0, %ymm1, %ymm2 +vmaxps (%rax), %ymm1, %ymm2 + +vmaxsd %xmm0, %xmm1, %xmm2 +vmaxsd (%rax), %xmm1, %xmm2 + +vmaxss %xmm0, %xmm1, %xmm2 +vmaxss (%rax), %xmm1, %xmm2 + +vminpd %xmm0, %xmm1, %xmm2 +vminpd (%rax), %xmm1, %xmm2 + +vminpd %ymm0, %ymm1, %ymm2 +vminpd (%rax), %ymm1, %ymm2 + +vminps %xmm0, %xmm1, %xmm2 +vminps (%rax), %xmm1, %xmm2 + +vminps %ymm0, %ymm1, %ymm2 +vminps (%rax), %ymm1, %ymm2 + +vminsd %xmm0, %xmm1, %xmm2 +vminsd (%rax), %xmm1, %xmm2 + +vminss %xmm0, %xmm1, %xmm2 +vminss (%rax), %xmm1, %xmm2 + +vmovapd %xmm0, %xmm2 +vmovapd %xmm0, (%rax) +vmovapd (%rax), %xmm2 + +vmovapd %ymm0, %ymm2 +vmovapd %ymm0, (%rax) +vmovapd (%rax), %ymm2 + +vmovaps %xmm0, %xmm2 +vmovaps %xmm0, (%rax) +vmovaps (%rax), %xmm2 + +vmovaps %ymm0, %ymm2 +vmovaps %ymm0, (%rax) +vmovaps (%rax), %ymm2 + +vmovd %eax, %xmm2 +vmovd (%rax), %xmm2 + +vmovd %xmm0, %ecx +vmovd %xmm0, (%rax) + +vmovddup %xmm0, %xmm2 +vmovddup (%rax), %xmm2 + +vmovddup %ymm0, %ymm2 +vmovddup (%rax), %ymm2 + +vmovdqa %xmm0, %xmm2 +vmovdqa %xmm0, (%rax) +vmovdqa (%rax), %xmm2 + +vmovdqa %ymm0, %ymm2 +vmovdqa %ymm0, (%rax) +vmovdqa (%rax), %ymm2 + +vmovdqu %xmm0, %xmm2 +vmovdqu %xmm0, (%rax) +vmovdqu (%rax), %xmm2 + +vmovdqu %ymm0, %ymm2 +vmovdqu %ymm0, (%rax) +vmovdqu (%rax), %ymm2 + +vmovhlps %xmm0, %xmm1, %xmm2 +vmovlhps %xmm0, %xmm1, %xmm2 + +vmovhpd %xmm0, (%rax) +vmovhpd (%rax), %xmm1, %xmm2 + +vmovhps %xmm0, (%rax) +vmovhps (%rax), %xmm1, %xmm2 + +vmovlpd %xmm0, (%rax) +vmovlpd (%rax), %xmm1, %xmm2 + +vmovlps %xmm0, (%rax) +vmovlps (%rax), %xmm1, %xmm2 + +vmovmskpd %xmm0, %rcx +vmovmskpd %ymm0, %rcx + +vmovmskps %xmm0, %rcx +vmovmskps %ymm0, %rcx + +vmovntdq %xmm0, (%rax) +vmovntdq %ymm0, (%rax) + +vmovntdqa (%rax), %xmm2 +vmovntdqa (%rax), %ymm2 + +vmovntpd %xmm0, (%rax) +vmovntpd %ymm0, (%rax) + +vmovntps %xmm0, (%rax) +vmovntps %ymm0, (%rax) + +vmovq %xmm0, %xmm2 + +vmovq %rax, %xmm2 +vmovq (%rax), %xmm2 + +vmovq %xmm0, %rcx +vmovq %xmm0, (%rax) + +vmovsd %xmm0, %xmm1, %xmm2 +vmovsd %xmm0, (%rax) +vmovsd (%rax), %xmm2 + +vmovshdup %xmm0, %xmm2 +vmovshdup (%rax), %xmm2 + +vmovshdup %ymm0, %ymm2 +vmovshdup (%rax), %ymm2 + +vmovsldup %xmm0, %xmm2 +vmovsldup (%rax), %xmm2 + +vmovsldup %ymm0, %ymm2 +vmovsldup (%rax), %ymm2 + +vmovss %xmm0, %xmm1, %xmm2 +vmovss %xmm0, (%rax) +vmovss (%rax), %xmm2 + +vmovupd %xmm0, %xmm2 +vmovupd %xmm0, (%rax) +vmovupd (%rax), %xmm2 + +vmovupd %ymm0, %ymm2 +vmovupd %ymm0, (%rax) +vmovupd (%rax), %ymm2 + +vmovups %xmm0, %xmm2 +vmovups %xmm0, (%rax) +vmovups (%rax), %xmm2 + +vmovups %ymm0, %ymm2 +vmovups %ymm0, (%rax) +vmovups (%rax), %ymm2 + +vmpsadbw $1, %xmm0, %xmm1, %xmm2 +vmpsadbw $1, (%rax), %xmm1, %xmm2 + +vmulpd %xmm0, %xmm1, %xmm2 +vmulpd (%rax), %xmm1, %xmm2 + +vmulpd %ymm0, %ymm1, %ymm2 +vmulpd (%rax), %ymm1, %ymm2 + +vmulps %xmm0, %xmm1, %xmm2 +vmulps (%rax), %xmm1, %xmm2 + +vmulps %ymm0, %ymm1, %ymm2 +vmulps (%rax), %ymm1, %ymm2 + +vmulsd %xmm0, %xmm1, %xmm2 +vmulsd (%rax), %xmm1, %xmm2 + +vmulss %xmm0, %xmm1, %xmm2 +vmulss (%rax), %xmm1, %xmm2 + +vorpd %xmm0, %xmm1, %xmm2 +vorpd (%rax), %xmm1, %xmm2 + +vorpd %ymm0, %ymm1, %ymm2 +vorpd (%rax), %ymm1, %ymm2 + +vorps %xmm0, %xmm1, %xmm2 +vorps (%rax), %xmm1, %xmm2 + +vorps %ymm0, %ymm1, %ymm2 +vorps (%rax), %ymm1, %ymm2 + +vpabsb %xmm0, %xmm2 +vpabsb (%rax), %xmm2 + +vpabsd %xmm0, %xmm2 +vpabsd (%rax), %xmm2 + +vpabsw %xmm0, %xmm2 +vpabsw (%rax), %xmm2 + +vpackssdw %xmm0, %xmm1, %xmm2 +vpackssdw (%rax), %xmm1, %xmm2 + +vpacksswb %xmm0, %xmm1, %xmm2 +vpacksswb (%rax), %xmm1, %xmm2 + +vpackusdw %xmm0, %xmm1, %xmm2 +vpackusdw (%rax), %xmm1, %xmm2 + +vpackuswb %xmm0, %xmm1, %xmm2 +vpackuswb (%rax), %xmm1, %xmm2 + +vpaddb %xmm0, %xmm1, %xmm2 +vpaddb (%rax), %xmm1, %xmm2 + +vpaddd %xmm0, %xmm1, %xmm2 +vpaddd (%rax), %xmm1, %xmm2 + +vpaddq %xmm0, %xmm1, %xmm2 +vpaddq (%rax), %xmm1, %xmm2 + +vpaddsb %xmm0, %xmm1, %xmm2 +vpaddsb (%rax), %xmm1, %xmm2 + +vpaddsw %xmm0, %xmm1, %xmm2 +vpaddsw (%rax), %xmm1, %xmm2 + +vpaddusb %xmm0, %xmm1, %xmm2 +vpaddusb (%rax), %xmm1, %xmm2 + +vpaddusw %xmm0, %xmm1, %xmm2 +vpaddusw (%rax), %xmm1, %xmm2 + +vpaddw %xmm0, %xmm1, %xmm2 +vpaddw (%rax), %xmm1, %xmm2 + +vpalignr $1, %xmm0, %xmm1, %xmm2 +vpalignr $1, (%rax), %xmm1, %xmm2 + +vpand %xmm0, %xmm1, %xmm2 +vpand (%rax), %xmm1, %xmm2 + +vpandn %xmm0, %xmm1, %xmm2 +vpandn (%rax), %xmm1, %xmm2 + +vpavgb %xmm0, %xmm1, %xmm2 +vpavgb (%rax), %xmm1, %xmm2 + +vpavgw %xmm0, %xmm1, %xmm2 +vpavgw (%rax), %xmm1, %xmm2 + +vpblendvb %xmm3, %xmm0, %xmm1, %xmm2 +vpblendvb %xmm3, (%rax), %xmm1, %xmm2 + +vpblendw $11, %xmm0, %xmm1, %xmm2 +vpblendw $11, (%rax), %xmm1, %xmm2 + +vpclmulqdq $11, %xmm0, %xmm1, %xmm2 +vpclmulqdq $11, (%rax), %xmm1, %xmm2 + +vpcmpeqb %xmm0, %xmm1, %xmm2 +vpcmpeqb (%rax), %xmm1, %xmm2 + +vpcmpeqd %xmm0, %xmm1, %xmm2 +vpcmpeqd (%rax), %xmm1, %xmm2 + +vpcmpeqq %xmm0, %xmm1, %xmm2 +vpcmpeqq (%rax), %xmm1, %xmm2 + +vpcmpeqw %xmm0, %xmm1, %xmm2 +vpcmpeqw (%rax), %xmm1, %xmm2 + +vpcmpestri $1, %xmm0, %xmm2 +vpcmpestri $1, (%rax), %xmm2 + +vpcmpestrm $1, %xmm0, %xmm2 +vpcmpestrm $1, (%rax), %xmm2 + +vpcmpgtb %xmm0, %xmm1, %xmm2 +vpcmpgtb (%rax), %xmm1, %xmm2 + +vpcmpgtd %xmm0, %xmm1, %xmm2 +vpcmpgtd (%rax), %xmm1, %xmm2 + +vpcmpgtq %xmm0, %xmm1, %xmm2 +vpcmpgtq (%rax), %xmm1, %xmm2 + +vpcmpgtw %xmm0, %xmm1, %xmm2 +vpcmpgtw (%rax), %xmm1, %xmm2 + +vpcmpistri $1, %xmm0, %xmm2 +vpcmpistri $1, (%rax), %xmm2 + +vpcmpistrm $1, %xmm0, %xmm2 +vpcmpistrm $1, (%rax), %xmm2 + +vperm2f128 $1, %ymm0, %ymm1, %ymm2 +vperm2f128 $1, (%rax), %ymm1, %ymm2 + +vpermilpd $1, %xmm0, %xmm2 +vpermilpd $1, (%rax), %xmm2 +vpermilpd %xmm0, %xmm1, %xmm2 +vpermilpd (%rax), %xmm1, %xmm2 + +vpermilpd $1, %ymm0, %ymm2 +vpermilpd $1, (%rax), %ymm2 +vpermilpd %ymm0, %ymm1, %ymm2 +vpermilpd (%rax), %ymm1, %ymm2 + +vpermilps $1, %xmm0, %xmm2 +vpermilps $1, (%rax), %xmm2 +vpermilps %xmm0, %xmm1, %xmm2 +vpermilps (%rax), %xmm1, %xmm2 + +vpermilps $1, %ymm0, %ymm2 +vpermilps $1, (%rax), %ymm2 +vpermilps %ymm0, %ymm1, %ymm2 +vpermilps (%rax), %ymm1, %ymm2 + +vpextrb $1, %xmm0, %ecx +vpextrb $1, %xmm0, (%rax) + +vpextrd $1, %xmm0, %ecx +vpextrd $1, %xmm0, (%rax) + +vpextrq $1, %xmm0, %rcx +vpextrq $1, %xmm0, (%rax) + +vpextrw $1, %xmm0, %ecx +vpextrw $1, %xmm0, (%rax) + +vphaddd %xmm0, %xmm1, %xmm2 +vphaddd (%rax), %xmm1, %xmm2 + +vphaddsw %xmm0, %xmm1, %xmm2 +vphaddsw (%rax), %xmm1, %xmm2 + +vphaddw %xmm0, %xmm1, %xmm2 +vphaddw (%rax), %xmm1, %xmm2 + +vphminposuw %xmm0, %xmm2 +vphminposuw (%rax), %xmm2 + +vphsubd %xmm0, %xmm1, %xmm2 +vphsubd (%rax), %xmm1, %xmm2 + +vphsubsw %xmm0, %xmm1, %xmm2 +vphsubsw (%rax), %xmm1, %xmm2 + +vphsubw %xmm0, %xmm1, %xmm2 +vphsubw (%rax), %xmm1, %xmm2 + +vpinsrb $1, %eax, %xmm1, %xmm2 +vpinsrb $1, (%rax), %xmm1, %xmm2 + +vpinsrd $1, %eax, %xmm1, %xmm2 +vpinsrd $1, (%rax), %xmm1, %xmm2 + +vpinsrq $1, %rax, %xmm1, %xmm2 +vpinsrq $1, (%rax), %xmm1, %xmm2 + +vpinsrw $1, %eax, %xmm1, %xmm2 +vpinsrw $1, (%rax), %xmm1, %xmm2 + +vpmaddubsw %xmm0, %xmm1, %xmm2 +vpmaddubsw (%rax), %xmm1, %xmm2 + +vpmaddwd %xmm0, %xmm1, %xmm2 +vpmaddwd (%rax), %xmm1, %xmm2 + +vpmaxsb %xmm0, %xmm1, %xmm2 +vpmaxsb (%rax), %xmm1, %xmm2 + +vpmaxsd %xmm0, %xmm1, %xmm2 +vpmaxsd (%rax), %xmm1, %xmm2 + +vpmaxsw %xmm0, %xmm1, %xmm2 +vpmaxsw (%rax), %xmm1, %xmm2 + +vpmaxub %xmm0, %xmm1, %xmm2 +vpmaxub (%rax), %xmm1, %xmm2 + +vpmaxud %xmm0, %xmm1, %xmm2 +vpmaxud (%rax), %xmm1, %xmm2 + +vpmaxuw %xmm0, %xmm1, %xmm2 +vpmaxuw (%rax), %xmm1, %xmm2 + +vpminsb %xmm0, %xmm1, %xmm2 +vpminsb (%rax), %xmm1, %xmm2 + +vpminsd %xmm0, %xmm1, %xmm2 +vpminsd (%rax), %xmm1, %xmm2 + +vpminsw %xmm0, %xmm1, %xmm2 +vpminsw (%rax), %xmm1, %xmm2 + +vpminub %xmm0, %xmm1, %xmm2 +vpminub (%rax), %xmm1, %xmm2 + +vpminud %xmm0, %xmm1, %xmm2 +vpminud (%rax), %xmm1, %xmm2 + +vpminuw %xmm0, %xmm1, %xmm2 +vpminuw (%rax), %xmm1, %xmm2 + +vpmovmskb %xmm0, %rcx + +vpmovsxbd %xmm0, %xmm2 +vpmovsxbd (%rax), %xmm2 + +vpmovsxbq %xmm0, %xmm2 +vpmovsxbq (%rax), %xmm2 + +vpmovsxbw %xmm0, %xmm2 +vpmovsxbw (%rax), %xmm2 + +vpmovsxdq %xmm0, %xmm2 +vpmovsxdq (%rax), %xmm2 + +vpmovsxwd %xmm0, %xmm2 +vpmovsxwd (%rax), %xmm2 + +vpmovsxwq %xmm0, %xmm2 +vpmovsxwq (%rax), %xmm2 + +vpmovzxbd %xmm0, %xmm2 +vpmovzxbd (%rax), %xmm2 + +vpmovzxbq %xmm0, %xmm2 +vpmovzxbq (%rax), %xmm2 + +vpmovzxbw %xmm0, %xmm2 +vpmovzxbw (%rax), %xmm2 + +vpmovzxdq %xmm0, %xmm2 +vpmovzxdq (%rax), %xmm2 + +vpmovzxwd %xmm0, %xmm2 +vpmovzxwd (%rax), %xmm2 + +vpmovzxwq %xmm0, %xmm2 +vpmovzxwq (%rax), %xmm2 + +vpmuldq %xmm0, %xmm1, %xmm2 +vpmuldq (%rax), %xmm1, %xmm2 + +vpmulhrsw %xmm0, %xmm1, %xmm2 +vpmulhrsw (%rax), %xmm1, %xmm2 + +vpmulhuw %xmm0, %xmm1, %xmm2 +vpmulhuw (%rax), %xmm1, %xmm2 + +vpmulhw %xmm0, %xmm1, %xmm2 +vpmulhw (%rax), %xmm1, %xmm2 + +vpmulld %xmm0, %xmm1, %xmm2 +vpmulld (%rax), %xmm1, %xmm2 + +vpmullw %xmm0, %xmm1, %xmm2 +vpmullw (%rax), %xmm1, %xmm2 + +vpmuludq %xmm0, %xmm1, %xmm2 +vpmuludq (%rax), %xmm1, %xmm2 + +vpor %xmm0, %xmm1, %xmm2 +vpor (%rax), %xmm1, %xmm2 + +vpsadbw %xmm0, %xmm1, %xmm2 +vpsadbw (%rax), %xmm1, %xmm2 + +vpshufb %xmm0, %xmm1, %xmm2 +vpshufb (%rax), %xmm1, %xmm2 + +vpshufd $1, %xmm0, %xmm2 +vpshufd $1, (%rax), %xmm2 + +vpshufhw $1, %xmm0, %xmm2 +vpshufhw $1, (%rax), %xmm2 + +vpshuflw $1, %xmm0, %xmm2 +vpshuflw $1, (%rax), %xmm2 + +vpsignb %xmm0, %xmm1, %xmm2 +vpsignb (%rax), %xmm1, %xmm2 + +vpsignd %xmm0, %xmm1, %xmm2 +vpsignd (%rax), %xmm1, %xmm2 + +vpsignw %xmm0, %xmm1, %xmm2 +vpsignw (%rax), %xmm1, %xmm2 + +vpslld $1, %xmm0, %xmm2 +vpslld %xmm0, %xmm1, %xmm2 +vpslld (%rax), %xmm1, %xmm2 + +vpslldq $1, %xmm1, %xmm2 + +vpsllq $1, %xmm0, %xmm2 +vpsllq %xmm0, %xmm1, %xmm2 +vpsllq (%rax), %xmm1, %xmm2 + +vpsllw $1, %xmm0, %xmm2 +vpsllw %xmm0, %xmm1, %xmm2 +vpsllw (%rax), %xmm1, %xmm2 + +vpsrad $1, %xmm0, %xmm2 +vpsrad %xmm0, %xmm1, %xmm2 +vpsrad (%rax), %xmm1, %xmm2 + +vpsraw $1, %xmm0, %xmm2 +vpsraw %xmm0, %xmm1, %xmm2 +vpsraw (%rax), %xmm1, %xmm2 + +vpsrld $1, %xmm0, %xmm2 +vpsrld %xmm0, %xmm1, %xmm2 +vpsrld (%rax), %xmm1, %xmm2 + +vpsrldq $1, %xmm1, %xmm2 + +vpsrlq $1, %xmm0, %xmm2 +vpsrlq %xmm0, %xmm1, %xmm2 +vpsrlq (%rax), %xmm1, %xmm2 + +vpsrlw $1, %xmm0, %xmm2 +vpsrlw %xmm0, %xmm1, %xmm2 +vpsrlw (%rax), %xmm1, %xmm2 + +vpsubb %xmm0, %xmm1, %xmm2 +vpsubb (%rax), %xmm1, %xmm2 + +vpsubd %xmm0, %xmm1, %xmm2 +vpsubd (%rax), %xmm1, %xmm2 + +vpsubq %xmm0, %xmm1, %xmm2 +vpsubq (%rax), %xmm1, %xmm2 + +vpsubsb %xmm0, %xmm1, %xmm2 +vpsubsb (%rax), %xmm1, %xmm2 + +vpsubsw %xmm0, %xmm1, %xmm2 +vpsubsw (%rax), %xmm1, %xmm2 + +vpsubusb %xmm0, %xmm1, %xmm2 +vpsubusb (%rax), %xmm1, %xmm2 + +vpsubusw %xmm0, %xmm1, %xmm2 +vpsubusw (%rax), %xmm1, %xmm2 + +vpsubw %xmm0, %xmm1, %xmm2 +vpsubw (%rax), %xmm1, %xmm2 + +vptest %xmm0, %xmm1 +vptest (%rax), %xmm1 + +vptest %ymm0, %ymm1 +vptest (%rax), %ymm1 + +vpunpckhbw %xmm0, %xmm1, %xmm2 +vpunpckhbw (%rax), %xmm1, %xmm2 + +vpunpckhdq %xmm0, %xmm1, %xmm2 +vpunpckhdq (%rax), %xmm1, %xmm2 + +vpunpckhqdq %xmm0, %xmm1, %xmm2 +vpunpckhqdq (%rax), %xmm1, %xmm2 + +vpunpckhwd %xmm0, %xmm1, %xmm2 +vpunpckhwd (%rax), %xmm1, %xmm2 + +vpunpcklbw %xmm0, %xmm1, %xmm2 +vpunpcklbw (%rax), %xmm1, %xmm2 + +vpunpckldq %xmm0, %xmm1, %xmm2 +vpunpckldq (%rax), %xmm1, %xmm2 + +vpunpcklqdq %xmm0, %xmm1, %xmm2 +vpunpcklqdq (%rax), %xmm1, %xmm2 + +vpunpcklwd %xmm0, %xmm1, %xmm2 +vpunpcklwd (%rax), %xmm1, %xmm2 + +vpxor %xmm0, %xmm1, %xmm2 +vpxor (%rax), %xmm1, %xmm2 + +vrcpps %xmm0, %xmm2 +vrcpps (%rax), %xmm2 + +vrcpps %ymm0, %ymm2 +vrcpps (%rax), %ymm2 + +vrcpss %xmm0, %xmm1, %xmm2 +vrcpss (%rax), %xmm1, %xmm2 + +vroundpd $1, %xmm0, %xmm2 +vroundpd $1, (%rax), %xmm2 + +vroundpd $1, %ymm0, %ymm2 +vroundpd $1, (%rax), %ymm2 + +vroundps $1, %xmm0, %xmm2 +vroundps $1, (%rax), %xmm2 + +vroundps $1, %ymm0, %ymm2 +vroundps $1, (%rax), %ymm2 + +vroundsd $1, %xmm0, %xmm1, %xmm2 +vroundsd $1, (%rax), %xmm1, %xmm2 + +vroundss $1, %xmm0, %xmm1, %xmm2 +vroundss $1, (%rax), %xmm1, %xmm2 + +vrsqrtps %xmm0, %xmm2 +vrsqrtps (%rax), %xmm2 + +vrsqrtps %ymm0, %ymm2 +vrsqrtps (%rax), %ymm2 + +vrsqrtss %xmm0, %xmm1, %xmm2 +vrsqrtss (%rax), %xmm1, %xmm2 + +vshufpd $1, %xmm0, %xmm1, %xmm2 +vshufpd $1, (%rax), %xmm1, %xmm2 + +vshufpd $1, %ymm0, %ymm1, %ymm2 +vshufpd $1, (%rax), %ymm1, %ymm2 + +vshufps $1, %xmm0, %xmm1, %xmm2 +vshufps $1, (%rax), %xmm1, %xmm2 + +vshufps $1, %ymm0, %ymm1, %ymm2 +vshufps $1, (%rax), %ymm1, %ymm2 + +vsqrtpd %xmm0, %xmm2 +vsqrtpd (%rax), %xmm2 + +vsqrtpd %ymm0, %ymm2 +vsqrtpd (%rax), %ymm2 + +vsqrtps %xmm0, %xmm2 +vsqrtps (%rax), %xmm2 + +vsqrtps %ymm0, %ymm2 +vsqrtps (%rax), %ymm2 + +vsqrtsd %xmm0, %xmm1, %xmm2 +vsqrtsd (%rax), %xmm1, %xmm2 + +vsqrtss %xmm0, %xmm1, %xmm2 +vsqrtss (%rax), %xmm1, %xmm2 + +vstmxcsr (%rax) + +vsubpd %xmm0, %xmm1, %xmm2 +vsubpd (%rax), %xmm1, %xmm2 + +vsubpd %ymm0, %ymm1, %ymm2 +vsubpd (%rax), %ymm1, %ymm2 + +vsubps %xmm0, %xmm1, %xmm2 +vsubps (%rax), %xmm1, %xmm2 + +vsubps %ymm0, %ymm1, %ymm2 +vsubps (%rax), %ymm1, %ymm2 + +vsubsd %xmm0, %xmm1, %xmm2 +vsubsd (%rax), %xmm1, %xmm2 + +vsubss %xmm0, %xmm1, %xmm2 +vsubss (%rax), %xmm1, %xmm2 + +vtestpd %xmm0, %xmm1 +vtestpd (%rax), %xmm1 + +vtestpd %ymm0, %ymm1 +vtestpd (%rax), %ymm1 + +vtestps %xmm0, %xmm1 +vtestps (%rax), %xmm1 + +vtestps %ymm0, %ymm1 +vtestps (%rax), %ymm1 + +vucomisd %xmm0, %xmm1 +vucomisd (%rax), %xmm1 + +vucomiss %xmm0, %xmm1 +vucomiss (%rax), %xmm1 + +vunpckhpd %xmm0, %xmm1, %xmm2 +vunpckhpd (%rax), %xmm1, %xmm2 + +vunpckhpd %ymm0, %ymm1, %ymm2 +vunpckhpd (%rax), %ymm1, %ymm2 + +vunpckhps %xmm0, %xmm1, %xmm2 +vunpckhps (%rax), %xmm1, %xmm2 + +vunpckhps %ymm0, %ymm1, %ymm2 +vunpckhps (%rax), %ymm1, %ymm2 + +vunpcklpd %xmm0, %xmm1, %xmm2 +vunpcklpd (%rax), %xmm1, %xmm2 + +vunpcklpd %ymm0, %ymm1, %ymm2 +vunpcklpd (%rax), %ymm1, %ymm2 + +vunpcklps %xmm0, %xmm1, %xmm2 +vunpcklps (%rax), %xmm1, %xmm2 + +vunpcklps %ymm0, %ymm1, %ymm2 +vunpcklps (%rax), %ymm1, %ymm2 + +vxorpd %xmm0, %xmm1, %xmm2 +vxorpd (%rax), %xmm1, %xmm2 + +vxorpd %ymm0, %ymm1, %ymm2 +vxorpd (%rax), %ymm1, %ymm2 + +vxorps %xmm0, %xmm1, %xmm2 +vxorps (%rax), %xmm1, %xmm2 + +vxorps %ymm0, %ymm1, %ymm2 +vxorps (%rax), %ymm1, %ymm2 + +vzeroall +vzeroupper + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 0.50 vaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vaddsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vaddsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vaddss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vaddss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vaddsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vaddsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vaddsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vaddsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vaddsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vaddsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaesdec %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 0.50 * vaesdec (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaesdeclast %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 0.50 * vaesdeclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaesenc %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 0.50 * vaesenc (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaesenclast %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 0.50 * vaesenclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaesimc %xmm0, %xmm2 +# CHECK-NEXT: 1 11 0.50 * vaesimc (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 vaeskeygenassist $22, %xmm0, %xmm2 +# CHECK-NEXT: 1 11 0.50 * vaeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 vandnpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vandnpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vandnpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vandnpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vandnps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vandnps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vandnps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vandnps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vandpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vandpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vandpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vandpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vandps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vandps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vandps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vandps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vblendpd $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vblendpd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vblendpd $11, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vblendpd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vblendps $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vblendps $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vblendps $11, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vblendps $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vblendvpd %xmm3, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vblendvpd %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vblendvpd %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vblendvps %xmm3, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vblendvps %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vblendvps %ymm3, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vblendvps %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vbroadcastf128 (%rax), %ymm2 +# CHECK-NEXT: 1 8 0.50 * vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: 1 8 0.50 * vbroadcastss (%rax), %xmm2 +# CHECK-NEXT: 1 8 0.50 * vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vcmpeqpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vcmpeqpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vcmpeqpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vcmpeqpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vcmpeqps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vcmpeqps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vcmpeqps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vcmpeqps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vcmpeqsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vcmpeqsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vcmpeqss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vcmpeqss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 4 1.00 vcomisd %xmm0, %xmm1 +# CHECK-NEXT: 2 11 1.00 * vcomisd (%rax), %xmm1 +# CHECK-NEXT: 2 4 1.00 vcomiss %xmm0, %xmm1 +# CHECK-NEXT: 2 11 1.00 * vcomiss (%rax), %xmm1 +# CHECK-NEXT: 1 3 0.50 vcvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vcvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vcvtdq2ps %ymm0, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vcvtdq2ps (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2 +# CHECK-NEXT: 2 13 1.00 * vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vcvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2 +# CHECK-NEXT: 2 13 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vcvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vcvtps2dq %ymm0, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vcvtps2dq (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vcvtps2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 2 4 1.00 vcvtps2pd %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: 2 2 1.00 vcvtsd2si %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 vcvtsd2si %xmm0, %rcx +# CHECK-NEXT: 2 9 1.00 * vcvtsd2si (%rax), %ecx +# CHECK-NEXT: 2 9 1.00 * vcvtsd2si (%rax), %rcx +# CHECK-NEXT: 1 3 0.50 vcvtsd2ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 4 1.00 vcvtsi2sd %ecx, %xmm0, %xmm2 +# CHECK-NEXT: 2 4 1.00 vcvtsi2sd %rcx, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 2 4 1.00 vcvtsi2ss %ecx, %xmm0, %xmm2 +# CHECK-NEXT: 2 4 1.00 vcvtsi2ss %rcx, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 1 3 0.50 vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtss2sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 2 1.00 vcvtss2si %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 vcvtss2si %xmm0, %rcx +# CHECK-NEXT: 2 9 1.00 * vcvtss2si (%rax), %ecx +# CHECK-NEXT: 2 9 1.00 * vcvtss2si (%rax), %rcx +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2 +# CHECK-NEXT: 2 13 1.00 * vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vcvttps2dq %ymm0, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vcvttps2dq (%rax), %ymm2 +# CHECK-NEXT: 2 2 1.00 vcvttsd2si %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 vcvttsd2si %xmm0, %rcx +# CHECK-NEXT: 2 9 1.00 * vcvttsd2si (%rax), %ecx +# CHECK-NEXT: 2 9 1.00 * vcvttsd2si (%rax), %rcx +# CHECK-NEXT: 2 2 1.00 vcvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 vcvttss2si %xmm0, %rcx +# CHECK-NEXT: 2 9 1.00 * vcvttss2si (%rax), %ecx +# CHECK-NEXT: 2 9 1.00 * vcvttss2si (%rax), %rcx +# CHECK-NEXT: 1 13 5.00 vdivpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 20 5.00 * vdivpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 13 5.00 vdivpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 20 5.00 * vdivpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 11 3.00 vdivps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 18 3.00 * vdivps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 11 3.00 vdivps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 18 3.00 * vdivps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 13 5.00 vdivsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 20 5.00 * vdivsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 11 3.00 vdivss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 18 3.00 * vdivss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 3 9 3.00 vdppd $22, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 5 16 3.00 * vdppd $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 8 15 4.00 vdpps $22, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 10 22 4.00 * vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 7 15 4.00 vdpps $22, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 8 22 4.00 * vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vextractf128 $1, %ymm0, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vextractf128 $1, %ymm0, (%rax) +# CHECK-NEXT: 2 1 1.00 vextractps $1, %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 * vextractps $1, %xmm0, (%rax) +# CHECK-NEXT: 4 6 2.00 vhaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 13 2.00 * vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 3 6 2.00 vhaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 13 2.00 * vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 4 6 2.00 vhaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 13 2.00 * vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 3 6 2.00 vhaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 13 2.00 * vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 4 6 2.00 vhsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 13 2.00 * vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 3 6 2.00 vhsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 13 2.00 * vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 4 6 2.00 vhsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 13 2.00 * vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 3 6 2.00 vhsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 13 2.00 * vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 2 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vinsertps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %xmm2 +# CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %ymm2 +# CHECK-NEXT: 1 5 1.50 * * U vldmxcsr (%rax) +# CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 1 8 0.50 * vmaskmovpd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmaskmovpd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 10 1 4.00 * * vmaskmovpd %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 18 1 6.00 * * vmaskmovpd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmaskmovps (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmaskmovps (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 18 1 6.00 * * vmaskmovps %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 42 1 12.00 * * vmaskmovps %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 1 1 0.50 vmaxpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vmaxps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vmaxss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vminpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vminps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vminss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 0 0.25 vmovapd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovapd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovapd (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 vmovapd %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 * vmovapd %ymm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovapd (%rax), %ymm2 +# CHECK-NEXT: 1 0 0.25 vmovaps %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovaps (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 vmovaps %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 * vmovaps %ymm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovaps (%rax), %ymm2 +# CHECK-NEXT: 1 1 1.00 vmovd %eax, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmovd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 vmovd %xmm0, %ecx +# CHECK-NEXT: 1 1 1.00 * vmovd %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovddup %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vmovddup %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vmovddup (%rax), %ymm2 +# CHECK-NEXT: 1 0 0.25 vmovdqa %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovdqa %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovdqa (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 vmovdqa %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 * vmovdqa %ymm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovdqa (%rax), %ymm2 +# CHECK-NEXT: 1 0 0.25 vmovdqu %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovdqu %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovdqu (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 vmovdqu %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 * vmovdqu %ymm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovdqu (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vmovhlps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vmovlhps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 2 1.00 * vmovhpd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 2 1.00 * vmovhps %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovlpd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovlpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovlps %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovlps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vmovmskpd %xmm0, %ecx +# CHECK-NEXT: 1 1 1.00 vmovmskpd %ymm0, %ecx +# CHECK-NEXT: 1 1 1.00 vmovmskps %xmm0, %ecx +# CHECK-NEXT: 1 1 1.00 vmovmskps %ymm0, %ecx +# CHECK-NEXT: 1 1 1.00 * vmovntdq %xmm0, (%rax) +# CHECK-NEXT: 1 1 1.00 * vmovntdq %ymm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovntdqa (%rax), %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmovntdqa (%rax), %ymm2 +# CHECK-NEXT: 1 1 1.00 * vmovntpd %xmm0, (%rax) +# CHECK-NEXT: 1 1 1.00 * vmovntpd %ymm0, (%rax) +# CHECK-NEXT: 1 1 1.00 * vmovntps %xmm0, (%rax) +# CHECK-NEXT: 1 1 1.00 * vmovntps %ymm0, (%rax) +# CHECK-NEXT: 1 1 0.25 vmovq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 vmovq %rax, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmovq (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 vmovq %xmm0, %rcx +# CHECK-NEXT: 1 1 1.00 * vmovq %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 vmovsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovsd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vmovshdup %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmovshdup (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vmovshdup %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vmovshdup (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vmovsldup %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vmovsldup (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vmovsldup %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vmovsldup (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vmovss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovss %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovss (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 vmovupd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovupd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovupd (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 vmovupd %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 * vmovupd %ymm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovupd (%rax), %ymm2 +# CHECK-NEXT: 1 0 0.25 vmovups %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * vmovups %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovups (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 vmovups %ymm0, %ymm2 +# CHECK-NEXT: 1 1 1.00 * vmovups %ymm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * vmovups (%rax), %ymm2 +# CHECK-NEXT: 4 4 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 6 11 2.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vmulpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vmulpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vmulpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vmulps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vmulsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vmulsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vmulss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vmulss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vorpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vorpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vorps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vorps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpabsb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpabsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpabsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpabsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpabsw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpabsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpackssdw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpacksswb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpackusdw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpackuswb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpaddb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpaddb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpaddd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpaddq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpaddq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpaddsb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpaddsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpaddsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpaddusb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpaddusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpaddusw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpaddusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpaddw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpalignr $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpand (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpandn %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpandn (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpavgb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpavgb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpavgw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpavgw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpblendvb %xmm3, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpblendvb %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpblendw $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpblendw $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 4 4 2.00 vpclmulqdq $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 11 2.00 * vpclmulqdq $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpcmpeqb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpeqb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpcmpeqd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpeqd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpeqq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpcmpeqw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpeqw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 8 6 3.00 vpcmpestri $1, %xmm0, %xmm2 +# CHECK-NEXT: 12 13 3.00 * vpcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 7 6 3.00 vpcmpestrm $1, %xmm0, %xmm2 +# CHECK-NEXT: 12 13 3.00 * vpcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 vpcmpgtb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpgtb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpcmpgtd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpgtd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpcmpgtq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpgtq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpcmpgtw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpgtw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 4 2 2.00 vpcmpistri $1, %xmm0, %xmm2 +# CHECK-NEXT: 4 9 2.00 * vpcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3 6 2.00 vpcmpistrm $1, %xmm0, %xmm2 +# CHECK-NEXT: 4 13 2.00 * vpcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 7 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpermilpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vpermilpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpermilps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpermilps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vpermilps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpermilps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpermilps $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpermilps $1, (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vpermilps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 1 1.00 vpextrb $1, %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 * vpextrb $1, %xmm0, (%rax) +# CHECK-NEXT: 2 1 1.00 vpextrd $1, %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 * vpextrd $1, %xmm0, (%rax) +# CHECK-NEXT: 2 1 1.00 vpextrq $1, %xmm0, %rcx +# CHECK-NEXT: 2 2 1.00 * vpextrq $1, %xmm0, (%rax) +# CHECK-NEXT: 2 1 1.00 vpextrw $1, %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 * vpextrw $1, %xmm0, (%rax) +# CHECK-NEXT: 4 2 2.00 vphaddd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 4 2 2.00 vphaddsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 9 2.00 * vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 4 2 2.00 vphaddw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 9 2.00 * vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vphminposuw %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vphminposuw (%rax), %xmm2 +# CHECK-NEXT: 4 2 2.00 vphsubd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 9 2.00 * vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 4 2 2.00 vphsubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 9 2.00 * vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 4 2 2.00 vphsubw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4 9 2.00 * vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 2 1.00 vpinsrb $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.50 * vpinsrb $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 2 1.00 vpinsrd $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.50 * vpinsrd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 2 1.00 vpinsrq $1, %rax, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.50 * vpinsrq $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 2 1.00 vpinsrw $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.50 * vpinsrw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmaddubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmaddubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmaddwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmaddwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpmaxsb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpmaxsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpmaxub %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpmaxud %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpmaxuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpminsb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpminsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpminsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpminsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpminub %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpminub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpminud %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpminud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpminuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vpmovmskb %xmm0, %ecx +# CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovsxbd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovsxbq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovsxbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovsxdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovsxwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovsxwq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovzxbd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovzxbq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovzxbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovzxdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovzxwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmovzxwq (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmuldq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmulhrsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmulhrsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmulhuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmulhuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmulhw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmulhw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmullw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmullw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpmuludq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpmuludq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpor %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpsadbw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpsadbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpshufb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpshufb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpshufd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpshufd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpshufhw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpshuflw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsignb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsignb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsignd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsignd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsignw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsignw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpslld $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpslld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpslld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpslldq $1, %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsllq $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsllq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsllq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsllw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsllw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsllw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrad $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrad %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsrad (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsraw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsraw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsraw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrld $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsrld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrldq $1, %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrlq $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrlq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsrlq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrlw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrlw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsrlw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpsubb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsubb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpsubd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpsubq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsubq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsubsb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsubsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsubusb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsubusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsubusw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsubusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpsubw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 1 1.00 vptest %xmm0, %xmm1 +# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %xmm1 +# CHECK-NEXT: 2 1 1.00 vptest %ymm0, %ymm1 +# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %ymm1 +# CHECK-NEXT: 1 1 0.50 vpunpckhbw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckhbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpunpckhdq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckhdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpunpckhqdq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckhqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpunpckhwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckhwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpunpcklbw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpunpcklbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpunpckldq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpunpcklqdq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpunpcklqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpunpcklwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpunpcklwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpxor %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpxor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vrcpps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vrcpps (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vrcpps %ymm0, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vrcpps (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vrcpss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vrcpss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vrsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vrsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 vrsqrtps %ymm0, %ymm2 +# CHECK-NEXT: 1 10 1.00 * vrsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1 3 1.00 vrsqrtss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vrsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vshufpd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vshufpd $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vshufpd $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vshufps $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vshufps $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 21 9.00 vsqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 1 28 9.00 * vsqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1 21 9.00 vsqrtpd %ymm0, %ymm2 +# CHECK-NEXT: 1 28 9.00 * vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: 1 15 5.00 vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 1 22 5.00 * vsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1 15 5.00 vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: 1 22 5.00 * vsqrtps (%rax), %ymm2 +# CHECK-NEXT: 1 21 9.00 vsqrtsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 28 9.00 * vsqrtsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 15 5.00 vsqrtss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 22 5.00 * vsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 2 15.00 * U vstmxcsr (%rax) +# CHECK-NEXT: 1 3 0.50 vsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vsubsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 1 1.00 vtestpd %xmm0, %xmm1 +# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 +# CHECK-NEXT: 2 1 1.00 vtestpd %ymm0, %ymm1 +# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1 +# CHECK-NEXT: 2 1 1.00 vtestps %xmm0, %xmm1 +# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1 +# CHECK-NEXT: 2 1 1.00 vtestps %ymm0, %ymm1 +# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1 +# CHECK-NEXT: 2 4 1.00 vucomisd %xmm0, %xmm1 +# CHECK-NEXT: 2 11 1.00 * vucomisd (%rax), %xmm1 +# CHECK-NEXT: 2 4 1.00 vucomiss %xmm0, %xmm1 +# CHECK-NEXT: 2 11 1.00 * vucomiss (%rax), %xmm1 +# CHECK-NEXT: 1 1 0.50 vunpckhpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vunpckhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vunpckhpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vunpckhpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vunpckhps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vunpckhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vunpckhps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vunpckhps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vunpcklpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vunpcklpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vunpcklpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vunpcklpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vunpcklps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vunpcklps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vunpcklps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vunpcklps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vxorpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vxorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vxorpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vxorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vxorps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vxorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vxorps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vxorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 18 10 6.00 U vzeroall +# CHECK-NEXT: 1 0 0.25 U vzeroupper + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 184.25 373.25 253.75 141.75 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vaddsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesdec %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesdec (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesdeclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesenc %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesenc (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesenclast %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesenclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaesimc %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaesimc (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vaeskeygenassist $22, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vaeskeygenassist $22, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vandnpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vandnpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vandnpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vandnpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vandnps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vandnps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vandnps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vandnps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vandpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vandpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vandpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vandpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vandps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vandps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vandps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vandps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vblendpd $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vblendpd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vblendpd $11, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vblendpd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vblendps $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vblendps $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vblendps $11, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vblendps $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vblendvpd %xmm3, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vblendvpd %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vblendvpd %ymm3, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vblendvpd %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vblendvps %xmm3, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vblendvps %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vblendvps %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vbroadcastf128 (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vbroadcastss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vcmpeqpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcmpeqpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vcmpeqpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcmpeqpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vcmpeqps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcmpeqps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vcmpeqps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcmpeqps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vcmpeqsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcmpeqsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vcmpeqss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcmpeqss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vcomisd %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcomisd (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vcomiss %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcomiss (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtdq2pd %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtdq2pd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtdq2ps %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtdq2ps (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtpd2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtpd2dq %ymm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtpd2ps %ymm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtps2dq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtps2dq %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtps2dq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtps2pd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtps2pd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtps2pd %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtps2pd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtsd2si %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtsd2si %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtsd2si (%rax), %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtsd2si (%rax), %rcx +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtsi2sd %ecx, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtsi2sd %rcx, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtsi2sdl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtsi2sdq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtsi2ss %ecx, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtsi2ss %rcx, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtsi2ssl (%rax), %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtsi2ssq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtss2sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtss2si %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtss2si %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtss2si (%rax), %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtss2si (%rax), %rcx +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvttpd2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvttpd2dq %ymm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvttps2dq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvttps2dq %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvttps2dq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvttsd2si %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvttsd2si %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvttsd2si (%rax), %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvttsd2si (%rax), %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvttss2si %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvttss2si %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvttss2si (%rax), %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvttss2si (%rax), %rcx +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - vdivpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - vdivps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - vdivps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - vdivsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - vdivss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdivss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - - - - - - - - - - vdppd $22, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdppd $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - - - - - - - - - - vdpps $22, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - - - - - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vextractf128 $1, %ymm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vextractf128 $1, %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vextractps $1, %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vextractps $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vhaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vhaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vhaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vhaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vhsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vhsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vhsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vhsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vinsertf128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vinsertps $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vinsertps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vlddqu (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vlddqu (%rax), %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 1.50 1.50 1.50 1.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vldmxcsr (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmaskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaskmovpd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaskmovpd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 2.00 4.00 0.33 0.33 0.33 - - - 0.50 0.50 vmaskmovpd %xmm0, %xmm1, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 3.00 3.00 6.00 0.33 0.33 0.33 - - - 0.50 0.50 vmaskmovpd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaskmovps (%rax), %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 3.00 3.00 6.00 0.33 0.33 0.33 - - - 0.50 0.50 vmaskmovps %xmm0, %xmm1, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 6.00 6.00 12.00 0.33 0.33 0.33 - - - 0.50 0.50 vmaskmovps %ymm0, %ymm1, (%rax) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmaxps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmaxss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vminpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vminps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vminss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovapd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovapd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovapd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovapd %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovapd %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovapd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovaps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovaps %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovaps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovaps %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovaps %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovaps (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vmovd %eax, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vmovd %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovddup %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovddup (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovddup %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovddup (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovdqa %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovdqa %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovdqa (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovdqa %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovdqa %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovdqa (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovdqu %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovdqu %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovdqu (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovdqu %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovdqu %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovdqu (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovhlps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovlhps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovhpd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovhps %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovlpd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovlpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovlps %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovlps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vmovmskpd %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vmovmskpd %ymm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vmovmskps %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vmovmskps %ymm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovntdq %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovntdq %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovntdqa (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovntdqa (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovntpd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovntpd %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovntps %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovntps %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vmovq %rax, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vmovq %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovq %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovsd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovshdup %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovshdup (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovshdup %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovshdup (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovsldup %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovsldup (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovsldup %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovsldup (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vmovss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovss %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovupd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovupd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovupd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovupd %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovupd %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovupd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovups %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovups %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovups (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vmovups %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vmovups %ymm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovups (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - vmpsadbw $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmpsadbw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmulpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmulpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmulpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmulps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmulsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmulsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vmulss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmulss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vorpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vorpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vorps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vorps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpabsb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpabsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpabsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpaddsb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpaddsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpaddusb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpaddusw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpand (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpandn %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpandn (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpavgb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpavgb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpavgw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpavgw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpblendvb %xmm3, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpblendvb %xmm3, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpblendw $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpblendw $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpclmulqdq $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpeqb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpeqd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpcmpeqq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpeqw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - vpcmpestri $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 3.00 3.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - vpcmpestrm $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 3.00 3.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - vpcmpistri $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - vpcmpistrm $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vperm2f128 $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vperm2f128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermilpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermilpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermilpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermilpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermilpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermilpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermilpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermilps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermilps $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermilps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermilps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermilps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermilps $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpermilps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpextrb $1, %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vpextrb $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpextrd $1, %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vpextrd $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpextrq $1, %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vpextrq $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpextrw $1, %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vpextrw $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphaddd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphaddsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphaddw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vphminposuw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphminposuw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphsubd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphsubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphsubw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpinsrb $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpinsrb $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpinsrd $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpinsrd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpinsrq $1, %rax, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpinsrq $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - vpinsrw $1, %eax, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpinsrw $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxub %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxud %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminsb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminub %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminub (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminud %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminud (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpmovmskb %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmuldq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmuldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulhw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulhw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmullw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmullw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmuludq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmuludq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpor %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpsadbw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsadbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshufb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshufd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshufhw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshuflw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsignb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsignb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsignd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsignd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsignw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsignw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpslld $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpslld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpslld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpslldq $1, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllq $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsllq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsllw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrad $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrad %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrad (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsraw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsraw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsraw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrld $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrldq $1, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlq $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrlq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrlw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsubsb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubsb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsubusb %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubusb (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsubusw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubusw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 1.00 - - - - - - - - vptest %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 1.00 1.00 1.00 0.33 0.33 0.33 0.33 0.33 0.33 - - vptest (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 1.00 - - - - - - - - vptest %ymm0, %ymm1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 1.00 1.00 1.00 0.33 0.33 0.33 0.33 0.33 0.33 - - vptest (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckhbw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckhbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckhdq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckhdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckhqdq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckhqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckhwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckhwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpcklbw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpcklbw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckldq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpcklqdq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpcklqdq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpcklwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpcklwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpxor %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpxor (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vrcpps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vrcpps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vrcpps %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vrcpps (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vrcpss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vrsqrtps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vrsqrtps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vrsqrtps %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vrsqrtps (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vrsqrtss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vrsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vshufpd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vshufpd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vshufpd $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vshufpd $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vshufps $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vshufps $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vshufps $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vshufps $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - - - - - - - - - - - - vsqrtpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsqrtpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - - - - - - - - - - - - vsqrtpd %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsqrtpd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsqrtps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsqrtps (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - - - - - - - - - - - - vsqrtsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsqrtsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsqrtss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 15.00 15.00 15.00 15.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 vstmxcsr (%rax) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vsubsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsubsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vsubss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsubss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vtestpd %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vtestpd (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vtestpd %ymm0, %ymm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vtestpd (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vtestps %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vtestps (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vtestps %ymm0, %ymm1 +# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vtestps (%rax), %ymm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vucomisd %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vucomisd (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vucomiss %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vucomiss (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vunpckhpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vunpckhpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vunpckhpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vunpckhpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vunpckhps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vunpckhps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vunpckhps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vunpckhps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vunpcklpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vunpcklpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vunpcklpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vunpcklpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vunpcklps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vunpcklps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vunpcklps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vunpcklps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vxorpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vxorpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vxorpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vxorpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vxorps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vxorps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vxorps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vxorps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 6.00 6.00 6.00 6.00 - - - - - - - - - - - vzeroall +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vzeroupper diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s @@ -0,0 +1,1096 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +vbroadcasti128 (%rax), %ymm0 + +vbroadcastsd %xmm0, %ymm0 +vbroadcastss %xmm0, %ymm0 + +vextracti128 $1, %ymm0, %xmm2 +vextracti128 $1, %ymm0, (%rax) + +vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 + +vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 + +vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 + +vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 + +vinserti128 $1, %xmm0, %ymm1, %ymm2 +vinserti128 $1, (%rax), %ymm1, %ymm2 + +vmovntdqa (%rax), %ymm0 + +vmpsadbw $1, %ymm0, %ymm1, %ymm2 +vmpsadbw $1, (%rax), %ymm1, %ymm2 + +vpabsb %ymm0, %ymm2 +vpabsb (%rax), %ymm2 + +vpabsd %ymm0, %ymm2 +vpabsd (%rax), %ymm2 + +vpabsw %ymm0, %ymm2 +vpabsw (%rax), %ymm2 + +vpackssdw %ymm0, %ymm1, %ymm2 +vpackssdw (%rax), %ymm1, %ymm2 + +vpacksswb %ymm0, %ymm1, %ymm2 +vpacksswb (%rax), %ymm1, %ymm2 + +vpackusdw %ymm0, %ymm1, %ymm2 +vpackusdw (%rax), %ymm1, %ymm2 + +vpackuswb %ymm0, %ymm1, %ymm2 +vpackuswb (%rax), %ymm1, %ymm2 + +vpaddb %ymm0, %ymm1, %ymm2 +vpaddb (%rax), %ymm1, %ymm2 + +vpaddd %ymm0, %ymm1, %ymm2 +vpaddd (%rax), %ymm1, %ymm2 + +vpaddq %ymm0, %ymm1, %ymm2 +vpaddq (%rax), %ymm1, %ymm2 + +vpaddsb %ymm0, %ymm1, %ymm2 +vpaddsb (%rax), %ymm1, %ymm2 + +vpaddsw %ymm0, %ymm1, %ymm2 +vpaddsw (%rax), %ymm1, %ymm2 + +vpaddusb %ymm0, %ymm1, %ymm2 +vpaddusb (%rax), %ymm1, %ymm2 + +vpaddusw %ymm0, %ymm1, %ymm2 +vpaddusw (%rax), %ymm1, %ymm2 + +vpaddw %ymm0, %ymm1, %ymm2 +vpaddw (%rax), %ymm1, %ymm2 + +vpalignr $1, %ymm0, %ymm1, %ymm2 +vpalignr $1, (%rax), %ymm1, %ymm2 + +vpand %ymm0, %ymm1, %ymm2 +vpand (%rax), %ymm1, %ymm2 + +vpandn %ymm0, %ymm1, %ymm2 +vpandn (%rax), %ymm1, %ymm2 + +vpavgb %ymm0, %ymm1, %ymm2 +vpavgb (%rax), %ymm1, %ymm2 + +vpavgw %ymm0, %ymm1, %ymm2 +vpavgw (%rax), %ymm1, %ymm2 + +vpblendd $11, %xmm0, %xmm1, %xmm2 +vpblendd $11, (%rax), %xmm1, %xmm2 + +vpblendd $11, %ymm0, %ymm1, %ymm2 +vpblendd $11, (%rax), %ymm1, %ymm2 + +vpblendvb %ymm3, %ymm0, %ymm1, %ymm2 +vpblendvb %ymm3, (%rax), %ymm1, %ymm2 + +vpblendw $11, %ymm0, %ymm1, %ymm2 +vpblendw $11, (%rax), %ymm1, %ymm2 + +vpbroadcastb %xmm0, %xmm0 +vpbroadcastb (%rax), %xmm0 + +vpbroadcastb %xmm0, %ymm0 +vpbroadcastb (%rax), %ymm0 + +vpbroadcastd %xmm0, %xmm0 +vpbroadcastd (%rax), %xmm0 + +vpbroadcastd %xmm0, %ymm0 +vpbroadcastd (%rax), %ymm0 + +vpbroadcastq %xmm0, %xmm0 +vpbroadcastq (%rax), %xmm0 + +vpbroadcastq %xmm0, %ymm0 +vpbroadcastq (%rax), %ymm0 + +vpbroadcastw %xmm0, %xmm0 +vpbroadcastw (%rax), %xmm0 + +vpbroadcastw %xmm0, %ymm0 +vpbroadcastw (%rax), %ymm0 + +vpcmpeqb %ymm0, %ymm1, %ymm2 +vpcmpeqb (%rax), %ymm1, %ymm2 + +vpcmpeqd %ymm0, %ymm1, %ymm2 +vpcmpeqd (%rax), %ymm1, %ymm2 + +vpcmpeqq %ymm0, %ymm1, %ymm2 +vpcmpeqq (%rax), %ymm1, %ymm2 + +vpcmpeqw %ymm0, %ymm1, %ymm2 +vpcmpeqw (%rax), %ymm1, %ymm2 + +vpcmpgtb %ymm0, %ymm1, %ymm2 +vpcmpgtb (%rax), %ymm1, %ymm2 + +vpcmpgtd %ymm0, %ymm1, %ymm2 +vpcmpgtd (%rax), %ymm1, %ymm2 + +vpcmpgtq %ymm0, %ymm1, %ymm2 +vpcmpgtq (%rax), %ymm1, %ymm2 + +vpcmpgtw %ymm0, %ymm1, %ymm2 +vpcmpgtw (%rax), %ymm1, %ymm2 + +vperm2i128 $1, %ymm0, %ymm1, %ymm2 +vperm2i128 $1, (%rax), %ymm1, %ymm2 + +vpermd %ymm0, %ymm1, %ymm2 +vpermd (%rax), %ymm1, %ymm2 + +vpermpd $1, %ymm0, %ymm2 +vpermpd $1, (%rax), %ymm2 + +vpermps %ymm0, %ymm1, %ymm2 +vpermps (%rax), %ymm1, %ymm2 + +vpermq $1, %ymm0, %ymm2 +vpermq $1, (%rax), %ymm2 + +vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 + +vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 + +vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 + +vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 + +vphaddd %ymm0, %ymm1, %ymm2 +vphaddd (%rax), %ymm1, %ymm2 + +vphaddsw %ymm0, %ymm1, %ymm2 +vphaddsw (%rax), %ymm1, %ymm2 + +vphaddw %ymm0, %ymm1, %ymm2 +vphaddw (%rax), %ymm1, %ymm2 + +vphsubd %ymm0, %ymm1, %ymm2 +vphsubd (%rax), %ymm1, %ymm2 + +vphsubsw %ymm0, %ymm1, %ymm2 +vphsubsw (%rax), %ymm1, %ymm2 + +vphsubw %ymm0, %ymm1, %ymm2 +vphsubw (%rax), %ymm1, %ymm2 + +vpmaddubsw %ymm0, %ymm1, %ymm2 +vpmaddubsw (%rax), %ymm1, %ymm2 + +vpmaddwd %ymm0, %ymm1, %ymm2 +vpmaddwd (%rax), %ymm1, %ymm2 + +vpmaskmovd (%rax), %xmm0, %xmm2 +vpmaskmovd (%rax), %ymm0, %ymm2 + +vpmaskmovd %xmm0, %xmm1, (%rax) +vpmaskmovd %ymm0, %ymm1, (%rax) + +vpmaskmovq (%rax), %xmm0, %xmm2 +vpmaskmovq (%rax), %ymm0, %ymm2 + +vpmaskmovq %xmm0, %xmm1, (%rax) +vpmaskmovq %ymm0, %ymm1, (%rax) + +vpmaxsb %ymm0, %ymm1, %ymm2 +vpmaxsb (%rax), %ymm1, %ymm2 + +vpmaxsd %ymm0, %ymm1, %ymm2 +vpmaxsd (%rax), %ymm1, %ymm2 + +vpmaxsw %ymm0, %ymm1, %ymm2 +vpmaxsw (%rax), %ymm1, %ymm2 + +vpmaxub %ymm0, %ymm1, %ymm2 +vpmaxub (%rax), %ymm1, %ymm2 + +vpmaxud %ymm0, %ymm1, %ymm2 +vpmaxud (%rax), %ymm1, %ymm2 + +vpmaxuw %ymm0, %ymm1, %ymm2 +vpmaxuw (%rax), %ymm1, %ymm2 + +vpminsb %ymm0, %ymm1, %ymm2 +vpminsb (%rax), %ymm1, %ymm2 + +vpminsd %ymm0, %ymm1, %ymm2 +vpminsd (%rax), %ymm1, %ymm2 + +vpminsw %ymm0, %ymm1, %ymm2 +vpminsw (%rax), %ymm1, %ymm2 + +vpminub %ymm0, %ymm1, %ymm2 +vpminub (%rax), %ymm1, %ymm2 + +vpminud %ymm0, %ymm1, %ymm2 +vpminud (%rax), %ymm1, %ymm2 + +vpminuw %ymm0, %ymm1, %ymm2 +vpminuw (%rax), %ymm1, %ymm2 + +vpmovmskb %ymm0, %rcx + +vpmovsxbd %xmm0, %ymm2 +vpmovsxbd (%rax), %ymm2 + +vpmovsxbq %xmm0, %ymm2 +vpmovsxbq (%rax), %ymm2 + +vpmovsxbw %xmm0, %ymm2 +vpmovsxbw (%rax), %ymm2 + +vpmovsxdq %xmm0, %ymm2 +vpmovsxdq (%rax), %ymm2 + +vpmovsxwd %xmm0, %ymm2 +vpmovsxwd (%rax), %ymm2 + +vpmovsxwq %xmm0, %ymm2 +vpmovsxwq (%rax), %ymm2 + +vpmovzxbd %xmm0, %ymm2 +vpmovzxbd (%rax), %ymm2 + +vpmovzxbq %xmm0, %ymm2 +vpmovzxbq (%rax), %ymm2 + +vpmovzxbw %xmm0, %ymm2 +vpmovzxbw (%rax), %ymm2 + +vpmovzxdq %xmm0, %ymm2 +vpmovzxdq (%rax), %ymm2 + +vpmovzxwd %xmm0, %ymm2 +vpmovzxwd (%rax), %ymm2 + +vpmovzxwq %xmm0, %ymm2 +vpmovzxwq (%rax), %ymm2 + +vpmuldq %ymm0, %ymm1, %ymm2 +vpmuldq (%rax), %ymm1, %ymm2 + +vpmulhrsw %ymm0, %ymm1, %ymm2 +vpmulhrsw (%rax), %ymm1, %ymm2 + +vpmulhuw %ymm0, %ymm1, %ymm2 +vpmulhuw (%rax), %ymm1, %ymm2 + +vpmulhw %ymm0, %ymm1, %ymm2 +vpmulhw (%rax), %ymm1, %ymm2 + +vpmulld %ymm0, %ymm1, %ymm2 +vpmulld (%rax), %ymm1, %ymm2 + +vpmullw %ymm0, %ymm1, %ymm2 +vpmullw (%rax), %ymm1, %ymm2 + +vpmuludq %ymm0, %ymm1, %ymm2 +vpmuludq (%rax), %ymm1, %ymm2 + +vpor %ymm0, %ymm1, %ymm2 +vpor (%rax), %ymm1, %ymm2 + +vpsadbw %ymm0, %ymm1, %ymm2 +vpsadbw (%rax), %ymm1, %ymm2 + +vpshufb %ymm0, %ymm1, %ymm2 +vpshufb (%rax), %ymm1, %ymm2 + +vpshufd $1, %ymm0, %ymm2 +vpshufd $1, (%rax), %ymm2 + +vpshufhw $1, %ymm0, %ymm2 +vpshufhw $1, (%rax), %ymm2 + +vpshuflw $1, %ymm0, %ymm2 +vpshuflw $1, (%rax), %ymm2 + +vpsignb %ymm0, %ymm1, %ymm2 +vpsignb (%rax), %ymm1, %ymm2 + +vpsignd %ymm0, %ymm1, %ymm2 +vpsignd (%rax), %ymm1, %ymm2 + +vpsignw %ymm0, %ymm1, %ymm2 +vpsignw (%rax), %ymm1, %ymm2 + +vpslld $1, %ymm0, %ymm2 +vpslld %xmm0, %ymm1, %ymm2 +vpslld (%rax), %ymm1, %ymm2 + +vpslldq $1, %ymm1, %ymm2 + +vpsllq $1, %ymm0, %ymm2 +vpsllq %xmm0, %ymm1, %ymm2 +vpsllq (%rax), %ymm1, %ymm2 + +vpsllvd %xmm0, %xmm1, %xmm2 +vpsllvd (%rax), %xmm1, %xmm2 + +vpsllvd %ymm0, %ymm1, %ymm2 +vpsllvd (%rax), %ymm1, %ymm2 + +vpsllvq %xmm0, %xmm1, %xmm2 +vpsllvq (%rax), %xmm1, %xmm2 + +vpsllvq %ymm0, %ymm1, %ymm2 +vpsllvq (%rax), %ymm1, %ymm2 + +vpsllw $1, %ymm0, %ymm2 +vpsllw %xmm0, %ymm1, %ymm2 +vpsllw (%rax), %ymm1, %ymm2 + +vpsrad $1, %ymm0, %ymm2 +vpsrad %xmm0, %ymm1, %ymm2 +vpsrad (%rax), %ymm1, %ymm2 + +vpsravd %xmm0, %xmm1, %xmm2 +vpsravd (%rax), %xmm1, %xmm2 + +vpsravd %ymm0, %ymm1, %ymm2 +vpsravd (%rax), %ymm1, %ymm2 + +vpsraw $1, %ymm0, %ymm2 +vpsraw %xmm0, %ymm1, %ymm2 +vpsraw (%rax), %ymm1, %ymm2 + +vpsrld $1, %ymm0, %ymm2 +vpsrld %xmm0, %ymm1, %ymm2 +vpsrld (%rax), %ymm1, %ymm2 + +vpsrldq $1, %ymm1, %ymm2 + +vpsrlq $1, %ymm0, %ymm2 +vpsrlq %xmm0, %ymm1, %ymm2 +vpsrlq (%rax), %ymm1, %ymm2 + +vpsrlvd %xmm0, %xmm1, %xmm2 +vpsrlvd (%rax), %xmm1, %xmm2 + +vpsrlvd %ymm0, %ymm1, %ymm2 +vpsrlvd (%rax), %ymm1, %ymm2 + +vpsrlvq %xmm0, %xmm1, %xmm2 +vpsrlvq (%rax), %xmm1, %xmm2 + +vpsrlvq %ymm0, %ymm1, %ymm2 +vpsrlvq (%rax), %ymm1, %ymm2 + +vpsrlw $1, %ymm0, %ymm2 +vpsrlw %xmm0, %ymm1, %ymm2 +vpsrlw (%rax), %ymm1, %ymm2 + +vpsubb %ymm0, %ymm1, %ymm2 +vpsubb (%rax), %ymm1, %ymm2 + +vpsubd %ymm0, %ymm1, %ymm2 +vpsubd (%rax), %ymm1, %ymm2 + +vpsubq %ymm0, %ymm1, %ymm2 +vpsubq (%rax), %ymm1, %ymm2 + +vpsubsb %ymm0, %ymm1, %ymm2 +vpsubsb (%rax), %ymm1, %ymm2 + +vpsubsw %ymm0, %ymm1, %ymm2 +vpsubsw (%rax), %ymm1, %ymm2 + +vpsubusb %ymm0, %ymm1, %ymm2 +vpsubusb (%rax), %ymm1, %ymm2 + +vpsubusw %ymm0, %ymm1, %ymm2 +vpsubusw (%rax), %ymm1, %ymm2 + +vpsubw %ymm0, %ymm1, %ymm2 +vpsubw (%rax), %ymm1, %ymm2 + +vpunpckhbw %ymm0, %ymm1, %ymm2 +vpunpckhbw (%rax), %ymm1, %ymm2 + +vpunpckhdq %ymm0, %ymm1, %ymm2 +vpunpckhdq (%rax), %ymm1, %ymm2 + +vpunpckhqdq %ymm0, %ymm1, %ymm2 +vpunpckhqdq (%rax), %ymm1, %ymm2 + +vpunpckhwd %ymm0, %ymm1, %ymm2 +vpunpckhwd (%rax), %ymm1, %ymm2 + +vpunpcklbw %ymm0, %ymm1, %ymm2 +vpunpcklbw (%rax), %ymm1, %ymm2 + +vpunpckldq %ymm0, %ymm1, %ymm2 +vpunpckldq (%rax), %ymm1, %ymm2 + +vpunpcklqdq %ymm0, %ymm1, %ymm2 +vpunpcklqdq (%rax), %ymm1, %ymm2 + +vpunpcklwd %ymm0, %ymm1, %ymm2 +vpunpcklwd (%rax), %ymm1, %ymm2 + +vpxor %ymm0, %ymm1, %ymm2 +vpxor (%rax), %ymm1, %ymm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 8 0.50 * vbroadcasti128 (%rax), %ymm0 +# CHECK-NEXT: 1 2 1.00 vbroadcastsd %xmm0, %ymm0 +# CHECK-NEXT: 1 2 1.00 vbroadcastss %xmm0, %ymm0 +# CHECK-NEXT: 1 4 1.00 vextracti128 $1, %ymm0, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vextracti128 $1, %ymm0, (%rax) +# CHECK-NEXT: 1 5 0.33 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1 5 0.33 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1 5 0.33 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1 5 0.33 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1 2 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 9 1.00 * vinserti128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vmovntdqa (%rax), %ymm0 +# CHECK-NEXT: 3 4 2.00 vmpsadbw $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 11 2.00 * vmpsadbw $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpabsb %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpabsb (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vpabsd %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpabsd (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vpabsw %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpabsw (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vpackssdw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpackssdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpacksswb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpacksswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpackusdw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpackusdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpackuswb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpackuswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpaddb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpaddb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpaddd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpaddq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpaddq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpaddsb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpaddsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpaddsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpaddusb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpaddusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpaddusw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpaddusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpaddw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpalignr $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpalignr $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpand %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpand (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpandn %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpandn (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpavgb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpavgb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpavgw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpavgw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpblendd $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpblendd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.25 vpblendd $11, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpblendd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpblendvb %ymm3, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpblendvb %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpblendw $11, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpblendw $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpbroadcastb %xmm0, %xmm0 +# CHECK-NEXT: 1 8 0.50 * vpbroadcastb (%rax), %xmm0 +# CHECK-NEXT: 1 2 1.00 vpbroadcastb %xmm0, %ymm0 +# CHECK-NEXT: 1 8 0.50 * vpbroadcastb (%rax), %ymm0 +# CHECK-NEXT: 1 1 0.50 vpbroadcastd %xmm0, %xmm0 +# CHECK-NEXT: 1 8 0.50 * vpbroadcastd (%rax), %xmm0 +# CHECK-NEXT: 1 2 1.00 vpbroadcastd %xmm0, %ymm0 +# CHECK-NEXT: 1 8 0.50 * vpbroadcastd (%rax), %ymm0 +# CHECK-NEXT: 1 1 0.50 vpbroadcastq %xmm0, %xmm0 +# CHECK-NEXT: 1 8 0.50 * vpbroadcastq (%rax), %xmm0 +# CHECK-NEXT: 1 2 1.00 vpbroadcastq %xmm0, %ymm0 +# CHECK-NEXT: 1 8 0.50 * vpbroadcastq (%rax), %ymm0 +# CHECK-NEXT: 1 1 0.50 vpbroadcastw %xmm0, %xmm0 +# CHECK-NEXT: 1 8 0.50 * vpbroadcastw (%rax), %xmm0 +# CHECK-NEXT: 1 2 1.00 vpbroadcastw %xmm0, %ymm0 +# CHECK-NEXT: 1 8 0.50 * vpbroadcastw (%rax), %ymm0 +# CHECK-NEXT: 1 1 0.25 vpcmpeqb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpeqb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpcmpeqd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpeqd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpeqq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpcmpeqw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpeqw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpcmpgtb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpgtb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpcmpgtd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpgtd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpcmpgtq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpgtq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpcmpgtw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpcmpgtw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 1.00 vperm2i128 $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 9 1.00 * vperm2i128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 5 1.00 vpermd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 9 2.00 * vpermd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 6 1.00 vpermpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 3 10 2.00 * vpermpd $1, (%rax), %ymm2 +# CHECK-NEXT: 2 7 1.00 vpermps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 3 11 2.00 * vpermps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 6 1.00 vpermq $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 9 2.00 * vpermq $1, (%rax), %ymm2 +# CHECK-NEXT: 1 5 0.33 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 1 5 0.33 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 1 5 0.33 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 1 5 0.33 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 3 2 2.00 vphaddd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 3 2 2.00 vphaddsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 9 2.00 * vphaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 3 2 2.00 vphaddw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 9 2.00 * vphaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 3 2 2.00 vphsubd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 9 2.00 * vphsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 3 2 2.00 vphsubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 9 2.00 * vphsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 3 2 2.00 vphsubw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 4 9 2.00 * vphsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmaddwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmaddwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpmaskmovd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmaskmovd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 18 1 6.00 * * vpmaskmovd %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 42 1 12.00 * * vpmaskmovd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 1 8 0.50 * vpmaskmovq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpmaskmovq (%rax), %ymm0, %ymm2 +# CHECK-NEXT: 10 1 4.00 * * vpmaskmovq %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 18 1 6.00 * * vpmaskmovq %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 1 1 0.25 vpmaxsb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpmaxsd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpmaxsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpmaxub %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpmaxud %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpmaxuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpmaxuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpminsb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpminsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpminsd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpminsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpminsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpminsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpminub %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpminub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpminud %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpminud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpmovmskb %ymm0, %ecx +# CHECK-NEXT: 2 4 1.50 vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovsxbq %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovsxbw %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovsxdq %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovzxbq %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovzxbw %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovzxdq %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: 2 4 1.50 vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.50 * vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmuldq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmuldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmulhrsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmulhrsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmulhuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmulhuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmulhw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmulhw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmulld %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmullw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmullw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpmuludq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpmuludq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpor %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpsadbw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpsadbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpshufb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpshufb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpshufd $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpshufd $1, (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vpshufhw $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpshufhw $1, (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vpshuflw $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpshuflw $1, (%rax), %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsignb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsignb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsignd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsignd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsignw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsignw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpslld $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpslld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpslldq $1, %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsllq $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsllq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsllvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsllvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsllvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsllvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsllw $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsllw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrad $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrad %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsravd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsravd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsraw $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsraw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrld $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrldq $1, %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrlq $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrlq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrlvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrlvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrlvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 0.50 * vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 0.50 vpsrlvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrlw $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsrlw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpsubb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsubb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpsubd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpsubq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsubq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsubsb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsubsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsubusb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsubusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpsubusw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsubusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpsubw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpunpckhbw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckhbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpunpckhdq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckhdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpunpckhqdq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckhqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpunpckhwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckhwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpunpcklbw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpunpcklbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpunpckldq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpunpckldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpunpcklqdq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpunpcklqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.50 vpunpcklwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpunpcklwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 0.25 vpxor %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 0.50 * vpxor (%rax), %ymm1, %ymm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 70.75 139.25 109.75 36.25 80.50 80.50 29.00 52.33 52.33 52.33 50.67 50.67 50.67 2.50 2.50 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vbroadcasti128 (%rax), %ymm0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vbroadcastsd %xmm0, %ymm0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vbroadcastss %xmm0, %ymm0 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vextracti128 $1, %ymm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vextracti128 $1, %ymm0, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vinserti128 $1, %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmovntdqa (%rax), %ymm0 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - vmpsadbw $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vmpsadbw $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpabsb %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsb (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpabsd %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpabsw %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpabsw (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackssdw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackssdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpacksswb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpacksswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackusdw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackusdw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpackuswb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpackuswb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpaddsb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpaddsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpaddusb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpaddusw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpaddw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpalignr $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpalignr $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpand %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpand (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpandn %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpandn (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpavgb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpavgb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpavgw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpavgw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpblendd $11, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpblendd $11, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpblendd $11, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpblendd $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpblendvb %ymm3, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpblendvb %ymm3, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpblendw $11, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpblendw $11, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpbroadcastb %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpbroadcastb (%rax), %xmm0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpbroadcastb %xmm0, %ymm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpbroadcastb (%rax), %ymm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpbroadcastd %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpbroadcastd (%rax), %xmm0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpbroadcastd %xmm0, %ymm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpbroadcastd (%rax), %ymm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpbroadcastq %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpbroadcastq (%rax), %xmm0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpbroadcastq %xmm0, %ymm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpbroadcastq (%rax), %ymm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpbroadcastw %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpbroadcastw (%rax), %xmm0 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpbroadcastw %xmm0, %ymm0 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpbroadcastw (%rax), %ymm0 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpeqb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpeqd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpcmpeqq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpeqw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vperm2i128 $1, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermq $1, %ymm0, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermq $1, (%rax), %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphaddd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphaddsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphaddw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphsubd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphsubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vphsubw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vphsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 3.00 3.00 6.00 0.33 0.33 0.33 - - - 0.50 0.50 vpmaskmovd %xmm0, %xmm1, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 6.00 6.00 12.00 0.33 0.33 0.33 - - - 0.50 0.50 vpmaskmovd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaskmovq (%rax), %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaskmovq (%rax), %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - - - 2.00 2.00 4.00 0.33 0.33 0.33 - - - 0.50 0.50 vpmaskmovq %xmm0, %xmm1, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 3.00 3.00 6.00 0.33 0.33 0.33 - - - 0.50 0.50 vpmaskmovq %ymm0, %ymm1, (%rax) +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxsb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxsd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxub %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxud %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpmaxuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmaxuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminsb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminsd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminsd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminub %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminub (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminud %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminud (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpmovmskb %ymm0, %ecx +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsxbq %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsxbw %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbw (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsxdq %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxdq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovzxbq %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovzxbw %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovzxdq %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxdq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwd (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - - - - - - - - - - - - vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 1.50 1.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwq (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmuldq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmuldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulhw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulhw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmulld %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmullw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmullw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmuludq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmuludq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpor %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpor (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpsadbw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsadbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshufb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshufd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufd $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshufhw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufhw $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpshuflw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshuflw $1, (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsignb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsignb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsignd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsignd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsignw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsignw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpslld $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpslld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpslldq $1, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllq $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsllw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrad $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrad %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsravd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsravd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsraw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsraw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrld $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrldq $1, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlq $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlw $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpsrlw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsubsb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubsb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsubusb %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubusb (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - vpsubusw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubusw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckhbw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckhbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckhdq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckhdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckhqdq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckhqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckhwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckhwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpcklbw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpcklbw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpckldq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpckldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpcklqdq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpcklqdq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpunpcklwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpunpcklwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpxor %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpxor (%rax), %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s @@ -0,0 +1,135 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +andn %eax, %ebx, %ecx +andn (%rax), %ebx, %ecx + +andn %rax, %rbx, %rcx +andn (%rax), %rbx, %rcx + +bextr %eax, %ebx, %ecx +bextr %eax, (%rbx), %ecx + +bextr %rax, %rbx, %rcx +bextr %rax, (%rbx), %rcx + +blsi %eax, %ecx +blsi (%rax), %ecx + +blsi %rax, %rcx +blsi (%rax), %rcx + +blsmsk %eax, %ecx +blsmsk (%rax), %ecx + +blsmsk %rax, %rcx +blsmsk (%rax), %rcx + +blsr %eax, %ecx +blsr (%rax), %ecx + +blsr %rax, %rcx +blsr (%rax), %rcx + +tzcnt %ax, %cx +tzcnt (%rax), %cx + +tzcnt %eax, %ecx +tzcnt (%rax), %ecx + +tzcnt %rax, %rcx +tzcnt (%rax), %rcx + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.25 andnl %eax, %ebx, %ecx +# CHECK-NEXT: 1 5 0.33 * andnl (%rax), %ebx, %ecx +# CHECK-NEXT: 1 1 0.25 andnq %rax, %rbx, %rcx +# CHECK-NEXT: 1 5 0.33 * andnq (%rax), %rbx, %rcx +# CHECK-NEXT: 1 1 0.50 bextrl %eax, %ebx, %ecx +# CHECK-NEXT: 2 5 0.50 * bextrl %eax, (%rbx), %ecx +# CHECK-NEXT: 1 1 0.50 bextrq %rax, %rbx, %rcx +# CHECK-NEXT: 2 5 0.50 * bextrq %rax, (%rbx), %rcx +# CHECK-NEXT: 2 2 0.50 blsil %eax, %ecx +# CHECK-NEXT: 3 6 0.50 * blsil (%rax), %ecx +# CHECK-NEXT: 2 2 0.50 blsiq %rax, %rcx +# CHECK-NEXT: 3 6 0.50 * blsiq (%rax), %rcx +# CHECK-NEXT: 2 2 0.50 blsmskl %eax, %ecx +# CHECK-NEXT: 3 6 0.50 * blsmskl (%rax), %ecx +# CHECK-NEXT: 2 2 0.50 blsmskq %rax, %rcx +# CHECK-NEXT: 3 6 0.50 * blsmskq (%rax), %rcx +# CHECK-NEXT: 2 2 0.50 blsrl %eax, %ecx +# CHECK-NEXT: 3 6 0.50 * blsrl (%rax), %ecx +# CHECK-NEXT: 2 2 0.50 blsrq %rax, %rcx +# CHECK-NEXT: 3 6 0.50 * blsrq (%rax), %rcx +# CHECK-NEXT: 2 2 1.00 tzcntw %ax, %cx +# CHECK-NEXT: 2 6 0.50 * tzcntw (%rax), %cx +# CHECK-NEXT: 2 2 0.50 tzcntl %eax, %ecx +# CHECK-NEXT: 2 6 0.50 * tzcntl (%rax), %ecx +# CHECK-NEXT: 2 2 0.50 tzcntq %rax, %rcx +# CHECK-NEXT: 2 6 0.50 * tzcntq (%rax), %rcx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 4.33 4.33 4.33 8.00 12.50 12.50 8.00 - - - - - - - - 4.33 4.33 4.33 4.33 4.33 4.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andnl %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - andnl (%rax), %ebx, %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andnq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - andnq (%rax), %rbx, %rcx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - bextrl %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bextrl %eax, (%rbx), %ecx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - bextrq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bextrq %rax, (%rbx), %rcx +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - blsil %eax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - blsil (%rax), %ecx +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - blsiq %rax, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - blsiq (%rax), %rcx +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - blsmskl %eax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - blsmskl (%rax), %ecx +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - blsmskq %rax, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - blsmskq (%rax), %rcx +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - blsrl %eax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - blsrl (%rax), %ecx +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - blsrq %rax, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - blsrq (%rax), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - tzcntw %ax, %cx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - tzcntw (%rax), %cx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - tzcntl %eax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - tzcntl (%rax), %ecx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - tzcntq %rax, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - tzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s @@ -0,0 +1,156 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +bzhi %eax, %ebx, %ecx +bzhi %eax, (%rbx), %ecx + +bzhi %rax, %rbx, %rcx +bzhi %rax, (%rbx), %rcx + +mulx %eax, %ebx, %ecx +mulx (%rax), %ebx, %ecx + +mulx %rax, %rbx, %rcx +mulx (%rax), %rbx, %rcx + +pdep %eax, %ebx, %ecx +pdep (%rax), %ebx, %ecx + +pdep %rax, %rbx, %rcx +pdep (%rax), %rbx, %rcx + +pext %eax, %ebx, %ecx +pext (%rax), %ebx, %ecx + +pext %rax, %rbx, %rcx +pext (%rax), %rbx, %rcx + +rorx $1, %eax, %ecx +rorx $1, (%rax), %ecx + +rorx $1, %rax, %rcx +rorx $1, (%rax), %rcx + +sarx %eax, %ebx, %ecx +sarx %eax, (%rbx), %ecx + +sarx %rax, %rbx, %rcx +sarx %rax, (%rbx), %rcx + +shlx %eax, %ebx, %ecx +shlx %eax, (%rbx), %ecx + +shlx %rax, %rbx, %rcx +shlx %rax, (%rbx), %rcx + +shrx %eax, %ebx, %ecx +shrx %eax, (%rbx), %ecx + +shrx %rax, %rbx, %rcx +shrx %rax, (%rbx), %rcx + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 bzhil %eax, %ebx, %ecx +# CHECK-NEXT: 2 5 0.50 * bzhil %eax, (%rbx), %ecx +# CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx +# CHECK-NEXT: 2 5 0.50 * bzhiq %rax, (%rbx), %rcx +# CHECK-NEXT: 2 3 1.00 mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 2 7 2.00 * mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx +# CHECK-NEXT: 2 8 2.00 * mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx +# CHECK-NEXT: 1 5 0.33 * pdepl (%rax), %ebx, %ecx +# CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx +# CHECK-NEXT: 1 5 0.33 * pdepq (%rax), %rbx, %rcx +# CHECK-NEXT: 1 3 1.00 pextl %eax, %ebx, %ecx +# CHECK-NEXT: 1 5 0.33 * pextl (%rax), %ebx, %ecx +# CHECK-NEXT: 1 3 1.00 pextq %rax, %rbx, %rcx +# CHECK-NEXT: 1 5 0.33 * pextq (%rax), %rbx, %rcx +# CHECK-NEXT: 1 1 0.50 rorxl $1, %eax, %ecx +# CHECK-NEXT: 2 5 0.50 * rorxl $1, (%rax), %ecx +# CHECK-NEXT: 1 1 0.50 rorxq $1, %rax, %rcx +# CHECK-NEXT: 2 5 0.50 * rorxq $1, (%rax), %rcx +# CHECK-NEXT: 1 1 0.50 sarxl %eax, %ebx, %ecx +# CHECK-NEXT: 2 5 0.50 * sarxl %eax, (%rbx), %ecx +# CHECK-NEXT: 1 1 0.50 sarxq %rax, %rbx, %rcx +# CHECK-NEXT: 2 5 0.50 * sarxq %rax, (%rbx), %rcx +# CHECK-NEXT: 1 1 0.50 shlxl %eax, %ebx, %ecx +# CHECK-NEXT: 2 5 0.50 * shlxl %eax, (%rbx), %ecx +# CHECK-NEXT: 1 1 0.50 shlxq %rax, %rbx, %rcx +# CHECK-NEXT: 2 5 0.50 * shlxq %rax, (%rbx), %rcx +# CHECK-NEXT: 1 1 0.50 shrxl %eax, %ebx, %ecx +# CHECK-NEXT: 2 5 0.50 * shrxl %eax, (%rbx), %ecx +# CHECK-NEXT: 1 1 0.50 shrxq %rax, %rbx, %rcx +# CHECK-NEXT: 2 5 0.50 * shrxq %rax, (%rbx), %rcx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 5.33 5.33 5.33 1.00 21.00 11.00 1.00 - - - - - - - - 5.33 5.33 5.33 5.33 5.33 5.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - bzhil %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bzhil %eax, (%rbx), %ecx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - bzhiq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bzhiq %rax, (%rbx), %rcx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepl %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - pdepl (%rax), %ebx, %ecx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - pdepq (%rax), %rbx, %rcx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pextl %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - pextl (%rax), %ebx, %ecx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pextq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - pextq (%rax), %rbx, %rcx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorxl $1, %eax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rorxl $1, (%rax), %ecx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorxq $1, %rax, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rorxq $1, (%rax), %rcx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarxl %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sarxl %eax, (%rbx), %ecx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarxq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sarxq %rax, (%rbx), %rcx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlxl %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shlxl %eax, (%rbx), %ecx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlxq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shlxq %rax, (%rbx), %rcx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrxl %eax, %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shrxl %eax, (%rbx), %ecx +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrxq %rax, %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shrxq %rax, (%rbx), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +clflushopt (%rax) + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 0.33 * * U clflushopt (%rax) + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - clflushopt (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s @@ -0,0 +1,48 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +clzero + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 0.33 U clzero + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - clzero diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s @@ -0,0 +1,338 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +cmovow %si, %di +cmovnow %si, %di +cmovbw %si, %di +cmovaew %si, %di +cmovew %si, %di +cmovnew %si, %di +cmovbew %si, %di +cmovaw %si, %di +cmovsw %si, %di +cmovnsw %si, %di +cmovpw %si, %di +cmovnpw %si, %di +cmovlw %si, %di +cmovgew %si, %di +cmovlew %si, %di +cmovgw %si, %di + +cmovow (%rax), %di +cmovnow (%rax), %di +cmovbw (%rax), %di +cmovaew (%rax), %di +cmovew (%rax), %di +cmovnew (%rax), %di +cmovbew (%rax), %di +cmovaw (%rax), %di +cmovsw (%rax), %di +cmovnsw (%rax), %di +cmovpw (%rax), %di +cmovnpw (%rax), %di +cmovlw (%rax), %di +cmovgew (%rax), %di +cmovlew (%rax), %di +cmovgw (%rax), %di + +cmovol %esi, %edi +cmovnol %esi, %edi +cmovbl %esi, %edi +cmovael %esi, %edi +cmovel %esi, %edi +cmovnel %esi, %edi +cmovbel %esi, %edi +cmoval %esi, %edi +cmovsl %esi, %edi +cmovnsl %esi, %edi +cmovpl %esi, %edi +cmovnpl %esi, %edi +cmovll %esi, %edi +cmovgel %esi, %edi +cmovlel %esi, %edi +cmovgl %esi, %edi + +cmovol (%rax), %edi +cmovnol (%rax), %edi +cmovbl (%rax), %edi +cmovael (%rax), %edi +cmovel (%rax), %edi +cmovnel (%rax), %edi +cmovbel (%rax), %edi +cmoval (%rax), %edi +cmovsl (%rax), %edi +cmovnsl (%rax), %edi +cmovpl (%rax), %edi +cmovnpl (%rax), %edi +cmovll (%rax), %edi +cmovgel (%rax), %edi +cmovlel (%rax), %edi +cmovgl (%rax), %edi + +cmovoq %rsi, %rdi +cmovnoq %rsi, %rdi +cmovbq %rsi, %rdi +cmovaeq %rsi, %rdi +cmoveq %rsi, %rdi +cmovneq %rsi, %rdi +cmovbeq %rsi, %rdi +cmovaq %rsi, %rdi +cmovsq %rsi, %rdi +cmovnsq %rsi, %rdi +cmovpq %rsi, %rdi +cmovnpq %rsi, %rdi +cmovlq %rsi, %rdi +cmovgeq %rsi, %rdi +cmovleq %rsi, %rdi +cmovgq %rsi, %rdi + +cmovoq (%rax), %rdi +cmovnoq (%rax), %rdi +cmovbq (%rax), %rdi +cmovaeq (%rax), %rdi +cmoveq (%rax), %rdi +cmovneq (%rax), %rdi +cmovbeq (%rax), %rdi +cmovaq (%rax), %rdi +cmovsq (%rax), %rdi +cmovnsq (%rax), %rdi +cmovpq (%rax), %rdi +cmovnpq (%rax), %rdi +cmovlq (%rax), %rdi +cmovgeq (%rax), %rdi +cmovleq (%rax), %rdi +cmovgq (%rax), %rdi + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 cmovow %si, %di +# CHECK-NEXT: 1 1 0.50 cmovnow %si, %di +# CHECK-NEXT: 1 1 0.50 cmovbw %si, %di +# CHECK-NEXT: 1 1 0.50 cmovaew %si, %di +# CHECK-NEXT: 1 1 0.50 cmovew %si, %di +# CHECK-NEXT: 1 1 0.50 cmovnew %si, %di +# CHECK-NEXT: 1 1 0.50 cmovbew %si, %di +# CHECK-NEXT: 1 1 0.50 cmovaw %si, %di +# CHECK-NEXT: 1 1 0.50 cmovsw %si, %di +# CHECK-NEXT: 1 1 0.50 cmovnsw %si, %di +# CHECK-NEXT: 1 1 0.50 cmovpw %si, %di +# CHECK-NEXT: 1 1 0.50 cmovnpw %si, %di +# CHECK-NEXT: 1 1 0.50 cmovlw %si, %di +# CHECK-NEXT: 1 1 0.50 cmovgew %si, %di +# CHECK-NEXT: 1 1 0.50 cmovlew %si, %di +# CHECK-NEXT: 1 1 0.50 cmovgw %si, %di +# CHECK-NEXT: 1 5 0.50 * cmovow (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovnow (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovbw (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovaew (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovew (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovnew (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovbew (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovaw (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovsw (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovnsw (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovpw (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovnpw (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovlw (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovgew (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovlew (%rax), %di +# CHECK-NEXT: 1 5 0.50 * cmovgw (%rax), %di +# CHECK-NEXT: 1 1 0.50 cmovol %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovnol %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovbl %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovael %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovel %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovnel %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovbel %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmoval %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovsl %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovnsl %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovpl %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovnpl %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovll %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovgel %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovlel %esi, %edi +# CHECK-NEXT: 1 1 0.50 cmovgl %esi, %edi +# CHECK-NEXT: 1 5 0.50 * cmovol (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovnol (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovbl (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovael (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovel (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovnel (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovbel (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmoval (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovsl (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovnsl (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovpl (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovnpl (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovll (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovgel (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovlel (%rax), %edi +# CHECK-NEXT: 1 5 0.50 * cmovgl (%rax), %edi +# CHECK-NEXT: 1 1 0.50 cmovoq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovnoq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovbq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovaeq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmoveq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovneq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovbeq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovaq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovsq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovnsq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovpq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovnpq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovlq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovgeq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovleq %rsi, %rdi +# CHECK-NEXT: 1 1 0.50 cmovgq %rsi, %rdi +# CHECK-NEXT: 1 5 0.50 * cmovoq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovnoq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovbq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovaeq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmoveq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovneq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovbeq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovaq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovsq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovnsq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovpq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovnpq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovlq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovgeq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovleq (%rax), %rdi +# CHECK-NEXT: 1 5 0.50 * cmovgq (%rax), %rdi + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 16.00 16.00 16.00 48.00 - - 48.00 - - - - - - - - 16.00 16.00 16.00 16.00 16.00 16.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovow %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnow %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovbw %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovaew %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovew %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnew %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovbew %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovaw %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovsw %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnsw %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovpw %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnpw %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovlw %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovgew %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovlew %si, %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovgw %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovow (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnow (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovbw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovaew (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovew (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnew (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovbew (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovaw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovsw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnsw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovpw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnpw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovlw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovgew (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovlew (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovgw (%rax), %di +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovol %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnol %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovbl %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovael %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovel %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnel %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovbel %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmoval %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovsl %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnsl %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovpl %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnpl %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovll %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovgel %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovlel %esi, %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovgl %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovol (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnol (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovbl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovael (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovel (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnel (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovbel (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmoval (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovsl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnsl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovpl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnpl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovll (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovgel (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovlel (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovgl (%rax), %edi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovoq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnoq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovbq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovaeq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmoveq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovneq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovbeq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovaq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovsq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnsq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovpq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovnpq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovlq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovgeq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovleq %rsi, %rdi +# CHECK-NEXT: - - - 0.50 - - 0.50 - - - - - - - - - - - - - - - - cmovgq %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovoq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnoq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovaeq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmoveq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovneq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovbeq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovaq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovsq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnsq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovpq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovnpq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovlq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovgeq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovleq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.50 - - 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmovgq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s @@ -0,0 +1,51 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +cmpxchg8b (%rax) +cmpxchg16b (%rax) + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 19 3 6.00 * * cmpxchg8b (%rax) +# CHECK-NEXT: 28 4 14.75 * * cmpxchg16b (%rax) + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 20.75 20.75 20.75 20.75 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 6.00 6.00 6.00 6.00 - - - - - - - - - - - - - - - - cmpxchg8b (%rax) +# CHECK-NEXT: - - - 14.75 14.75 14.75 14.75 - - - - - - - - - - - - - - - - cmpxchg16b (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s @@ -0,0 +1,72 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +vcvtph2ps %xmm0, %xmm2 +vcvtph2ps (%rax), %xmm2 + +vcvtph2ps %xmm0, %ymm2 +vcvtph2ps (%rax), %ymm2 + +vcvtps2ph $0, %xmm0, %xmm2 +vcvtps2ph $0, %xmm0, (%rax) + +vcvtps2ph $0, %ymm0, %xmm2 +vcvtps2ph $0, %ymm0, (%rax) + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 0.50 vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: 2 4 1.00 vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: 2 4 1.00 * vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: 2 6 1.00 vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: 3 7 1.00 * vcvtps2ph $0, %ymm0, (%rax) + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - - - - - - - - 6.50 6.50 2.00 2.00 2.00 1.33 1.33 1.33 0.67 0.67 0.67 1.00 1.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s @@ -0,0 +1,716 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +vfmadd132pd %xmm0, %xmm1, %xmm2 +vfmadd132pd (%rax), %xmm1, %xmm2 + +vfmadd132pd %ymm0, %ymm1, %ymm2 +vfmadd132pd (%rax), %ymm1, %ymm2 + +vfmadd213pd %xmm0, %xmm1, %xmm2 +vfmadd213pd (%rax), %xmm1, %xmm2 + +vfmadd213pd %ymm0, %ymm1, %ymm2 +vfmadd213pd (%rax), %ymm1, %ymm2 + +vfmadd231pd %xmm0, %xmm1, %xmm2 +vfmadd231pd (%rax), %xmm1, %xmm2 + +vfmadd231pd %ymm0, %ymm1, %ymm2 +vfmadd231pd (%rax), %ymm1, %ymm2 + +vfmadd132ps %xmm0, %xmm1, %xmm2 +vfmadd132ps (%rax), %xmm1, %xmm2 + +vfmadd132ps %ymm0, %ymm1, %ymm2 +vfmadd132ps (%rax), %ymm1, %ymm2 + +vfmadd213ps %xmm0, %xmm1, %xmm2 +vfmadd213ps (%rax), %xmm1, %xmm2 + +vfmadd213ps %ymm0, %ymm1, %ymm2 +vfmadd213ps (%rax), %ymm1, %ymm2 + +vfmadd231ps %xmm0, %xmm1, %xmm2 +vfmadd231ps (%rax), %xmm1, %xmm2 + +vfmadd231ps %ymm0, %ymm1, %ymm2 +vfmadd231ps (%rax), %ymm1, %ymm2 + +vfmadd132sd %xmm0, %xmm1, %xmm2 +vfmadd132sd (%rax), %xmm1, %xmm2 + +vfmadd213sd %xmm0, %xmm1, %xmm2 +vfmadd213sd (%rax), %xmm1, %xmm2 + +vfmadd231sd %xmm0, %xmm1, %xmm2 +vfmadd231sd (%rax), %xmm1, %xmm2 + +vfmadd132ss %xmm0, %xmm1, %xmm2 +vfmadd132ss (%rax), %xmm1, %xmm2 + +vfmadd213ss %xmm0, %xmm1, %xmm2 +vfmadd213ss (%rax), %xmm1, %xmm2 + +vfmadd231ss %xmm0, %xmm1, %xmm2 +vfmadd231ss (%rax), %xmm1, %xmm2 + +vfmaddsub132pd %xmm0, %xmm1, %xmm2 +vfmaddsub132pd (%rax), %xmm1, %xmm2 + +vfmaddsub132pd %ymm0, %ymm1, %ymm2 +vfmaddsub132pd (%rax), %ymm1, %ymm2 + +vfmaddsub213pd %xmm0, %xmm1, %xmm2 +vfmaddsub213pd (%rax), %xmm1, %xmm2 + +vfmaddsub213pd %ymm0, %ymm1, %ymm2 +vfmaddsub213pd (%rax), %ymm1, %ymm2 + +vfmaddsub231pd %xmm0, %xmm1, %xmm2 +vfmaddsub231pd (%rax), %xmm1, %xmm2 + +vfmaddsub231pd %ymm0, %ymm1, %ymm2 +vfmaddsub231pd (%rax), %ymm1, %ymm2 + +vfmaddsub132ps %xmm0, %xmm1, %xmm2 +vfmaddsub132ps (%rax), %xmm1, %xmm2 + +vfmaddsub132ps %ymm0, %ymm1, %ymm2 +vfmaddsub132ps (%rax), %ymm1, %ymm2 + +vfmaddsub213ps %xmm0, %xmm1, %xmm2 +vfmaddsub213ps (%rax), %xmm1, %xmm2 + +vfmaddsub213ps %ymm0, %ymm1, %ymm2 +vfmaddsub213ps (%rax), %ymm1, %ymm2 + +vfmaddsub231ps %xmm0, %xmm1, %xmm2 +vfmaddsub231ps (%rax), %xmm1, %xmm2 + +vfmaddsub231ps %ymm0, %ymm1, %ymm2 +vfmaddsub231ps (%rax), %ymm1, %ymm2 + +vfmsub132pd %xmm0, %xmm1, %xmm2 +vfmsub132pd (%rax), %xmm1, %xmm2 + +vfmsub132pd %ymm0, %ymm1, %ymm2 +vfmsub132pd (%rax), %ymm1, %ymm2 + +vfmsub213pd %xmm0, %xmm1, %xmm2 +vfmsub213pd (%rax), %xmm1, %xmm2 + +vfmsub213pd %ymm0, %ymm1, %ymm2 +vfmsub213pd (%rax), %ymm1, %ymm2 + +vfmsub231pd %xmm0, %xmm1, %xmm2 +vfmsub231pd (%rax), %xmm1, %xmm2 + +vfmsub231pd %ymm0, %ymm1, %ymm2 +vfmsub231pd (%rax), %ymm1, %ymm2 + +vfmsub132ps %xmm0, %xmm1, %xmm2 +vfmsub132ps (%rax), %xmm1, %xmm2 + +vfmsub132ps %ymm0, %ymm1, %ymm2 +vfmsub132ps (%rax), %ymm1, %ymm2 + +vfmsub213ps %xmm0, %xmm1, %xmm2 +vfmsub213ps (%rax), %xmm1, %xmm2 + +vfmsub213ps %ymm0, %ymm1, %ymm2 +vfmsub213ps (%rax), %ymm1, %ymm2 + +vfmsub231ps %xmm0, %xmm1, %xmm2 +vfmsub231ps (%rax), %xmm1, %xmm2 + +vfmsub231ps %ymm0, %ymm1, %ymm2 +vfmsub231ps (%rax), %ymm1, %ymm2 + +vfmsub132sd %xmm0, %xmm1, %xmm2 +vfmsub132sd (%rax), %xmm1, %xmm2 + +vfmsub213sd %xmm0, %xmm1, %xmm2 +vfmsub213sd (%rax), %xmm1, %xmm2 + +vfmsub231sd %xmm0, %xmm1, %xmm2 +vfmsub231sd (%rax), %xmm1, %xmm2 + +vfmsub132ss %xmm0, %xmm1, %xmm2 +vfmsub132ss (%rax), %xmm1, %xmm2 + +vfmsub213ss %xmm0, %xmm1, %xmm2 +vfmsub213ss (%rax), %xmm1, %xmm2 + +vfmsub231ss %xmm0, %xmm1, %xmm2 +vfmsub231ss (%rax), %xmm1, %xmm2 + +vfmsubadd132pd %xmm0, %xmm1, %xmm2 +vfmsubadd132pd (%rax), %xmm1, %xmm2 + +vfmsubadd132pd %ymm0, %ymm1, %ymm2 +vfmsubadd132pd (%rax), %ymm1, %ymm2 + +vfmsubadd213pd %xmm0, %xmm1, %xmm2 +vfmsubadd213pd (%rax), %xmm1, %xmm2 + +vfmsubadd213pd %ymm0, %ymm1, %ymm2 +vfmsubadd213pd (%rax), %ymm1, %ymm2 + +vfmsubadd231pd %xmm0, %xmm1, %xmm2 +vfmsubadd231pd (%rax), %xmm1, %xmm2 + +vfmsubadd231pd %ymm0, %ymm1, %ymm2 +vfmsubadd231pd (%rax), %ymm1, %ymm2 + +vfmsubadd132ps %xmm0, %xmm1, %xmm2 +vfmsubadd132ps (%rax), %xmm1, %xmm2 + +vfmsubadd132ps %ymm0, %ymm1, %ymm2 +vfmsubadd132ps (%rax), %ymm1, %ymm2 + +vfmsubadd213ps %xmm0, %xmm1, %xmm2 +vfmsubadd213ps (%rax), %xmm1, %xmm2 + +vfmsubadd213ps %ymm0, %ymm1, %ymm2 +vfmsubadd213ps (%rax), %ymm1, %ymm2 + +vfmsubadd231ps %xmm0, %xmm1, %xmm2 +vfmsubadd231ps (%rax), %xmm1, %xmm2 + +vfmsubadd231ps %ymm0, %ymm1, %ymm2 +vfmsubadd231ps (%rax), %ymm1, %ymm2 + +vfnmadd132pd %xmm0, %xmm1, %xmm2 +vfnmadd132pd (%rax), %xmm1, %xmm2 + +vfnmadd132pd %ymm0, %ymm1, %ymm2 +vfnmadd132pd (%rax), %ymm1, %ymm2 + +vfnmadd213pd %xmm0, %xmm1, %xmm2 +vfnmadd213pd (%rax), %xmm1, %xmm2 + +vfnmadd213pd %ymm0, %ymm1, %ymm2 +vfnmadd213pd (%rax), %ymm1, %ymm2 + +vfnmadd231pd %xmm0, %xmm1, %xmm2 +vfnmadd231pd (%rax), %xmm1, %xmm2 + +vfnmadd231pd %ymm0, %ymm1, %ymm2 +vfnmadd231pd (%rax), %ymm1, %ymm2 + +vfnmadd132ps %xmm0, %xmm1, %xmm2 +vfnmadd132ps (%rax), %xmm1, %xmm2 + +vfnmadd132ps %ymm0, %ymm1, %ymm2 +vfnmadd132ps (%rax), %ymm1, %ymm2 + +vfnmadd213ps %xmm0, %xmm1, %xmm2 +vfnmadd213ps (%rax), %xmm1, %xmm2 + +vfnmadd213ps %ymm0, %ymm1, %ymm2 +vfnmadd213ps (%rax), %ymm1, %ymm2 + +vfnmadd231ps %xmm0, %xmm1, %xmm2 +vfnmadd231ps (%rax), %xmm1, %xmm2 + +vfnmadd231ps %ymm0, %ymm1, %ymm2 +vfnmadd231ps (%rax), %ymm1, %ymm2 + +vfnmadd132sd %xmm0, %xmm1, %xmm2 +vfnmadd132sd (%rax), %xmm1, %xmm2 + +vfnmadd213sd %xmm0, %xmm1, %xmm2 +vfnmadd213sd (%rax), %xmm1, %xmm2 + +vfnmadd231sd %xmm0, %xmm1, %xmm2 +vfnmadd231sd (%rax), %xmm1, %xmm2 + +vfnmadd132ss %xmm0, %xmm1, %xmm2 +vfnmadd132ss (%rax), %xmm1, %xmm2 + +vfnmadd213ss %xmm0, %xmm1, %xmm2 +vfnmadd213ss (%rax), %xmm1, %xmm2 + +vfnmadd231ss %xmm0, %xmm1, %xmm2 +vfnmadd231ss (%rax), %xmm1, %xmm2 + +vfnmsub132pd %xmm0, %xmm1, %xmm2 +vfnmsub132pd (%rax), %xmm1, %xmm2 + +vfnmsub132pd %ymm0, %ymm1, %ymm2 +vfnmsub132pd (%rax), %ymm1, %ymm2 + +vfnmsub213pd %xmm0, %xmm1, %xmm2 +vfnmsub213pd (%rax), %xmm1, %xmm2 + +vfnmsub213pd %ymm0, %ymm1, %ymm2 +vfnmsub213pd (%rax), %ymm1, %ymm2 + +vfnmsub231pd %xmm0, %xmm1, %xmm2 +vfnmsub231pd (%rax), %xmm1, %xmm2 + +vfnmsub231pd %ymm0, %ymm1, %ymm2 +vfnmsub231pd (%rax), %ymm1, %ymm2 + +vfnmsub132ps %xmm0, %xmm1, %xmm2 +vfnmsub132ps (%rax), %xmm1, %xmm2 + +vfnmsub132ps %ymm0, %ymm1, %ymm2 +vfnmsub132ps (%rax), %ymm1, %ymm2 + +vfnmsub213ps %xmm0, %xmm1, %xmm2 +vfnmsub213ps (%rax), %xmm1, %xmm2 + +vfnmsub213ps %ymm0, %ymm1, %ymm2 +vfnmsub213ps (%rax), %ymm1, %ymm2 + +vfnmsub231ps %xmm0, %xmm1, %xmm2 +vfnmsub231ps (%rax), %xmm1, %xmm2 + +vfnmsub231ps %ymm0, %ymm1, %ymm2 +vfnmsub231ps (%rax), %ymm1, %ymm2 + +vfnmsub132sd %xmm0, %xmm1, %xmm2 +vfnmsub132sd (%rax), %xmm1, %xmm2 + +vfnmsub213sd %xmm0, %xmm1, %xmm2 +vfnmsub213sd (%rax), %xmm1, %xmm2 + +vfnmsub231sd %xmm0, %xmm1, %xmm2 +vfnmsub231sd (%rax), %xmm1, %xmm2 + +vfnmsub132ss %xmm0, %xmm1, %xmm2 +vfnmsub132ss (%rax), %xmm1, %xmm2 + +vfnmsub213ss %xmm0, %xmm1, %xmm2 +vfnmsub213ss (%rax), %xmm1, %xmm2 + +vfnmsub231ss %xmm0, %xmm1, %xmm2 +vfnmsub231ss (%rax), %xmm1, %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmaddsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmaddsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfmsubadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfmsubadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 1.00 vfnmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 11 1.00 * vfnmsub231ss (%rax), %xmm1, %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - - - - - - 192.00 192.00 - - 48.00 48.00 - 32.00 32.00 32.00 32.00 32.00 32.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s @@ -0,0 +1,72 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +rdfsbase %eax +rdfsbase %rax + +rdgsbase %eax +rdgsbase %rax + +wrfsbase %edi +wrfsbase %rdi + +wrgsbase %edi +wrgsbase %rdi + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 100 100 25.00 * * U rdfsbasel %eax +# CHECK-NEXT: 100 100 25.00 * * U rdfsbaseq %rax +# CHECK-NEXT: 100 100 25.00 * * U rdgsbasel %eax +# CHECK-NEXT: 100 100 25.00 * * U rdgsbaseq %rax +# CHECK-NEXT: 100 100 25.00 * * U wrfsbasel %edi +# CHECK-NEXT: 100 100 25.00 * * U wrfsbaseq %rdi +# CHECK-NEXT: 100 100 25.00 * * U wrgsbasel %edi +# CHECK-NEXT: 100 100 25.00 * * U wrgsbaseq %rdi + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 200.00 200.00 200.00 200.00 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdfsbasel %eax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdfsbaseq %rax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdgsbasel %eax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdgsbaseq %rax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - wrfsbasel %edi +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - wrfsbaseq %rdi +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - wrgsbasel %edi +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - wrgsbaseq %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s @@ -0,0 +1,452 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +lea 0(), %cx +lea 0(), %ecx +lea 0(), %rcx +lea (%eax), %cx +lea (%eax), %ecx +lea (%eax), %rcx +lea (%rax), %cx +lea (%rax), %ecx +lea (%rax), %rcx +lea (, %ebx), %cx +lea (, %ebx), %ecx +lea (, %ebx), %rcx +lea (, %rbx), %cx +lea (, %rbx), %ecx +lea (, %rbx), %rcx +lea (, %ebx, 1), %cx +lea (, %ebx, 1), %ecx +lea (, %ebx, 1), %rcx +lea (, %rbx, 1), %cx +lea (, %rbx, 1), %ecx +lea (, %rbx, 1), %rcx +lea (, %ebx, 2), %cx +lea (, %ebx, 2), %ecx +lea (, %ebx, 2), %rcx +lea (, %rbx, 2), %cx +lea (, %rbx, 2), %ecx +lea (, %rbx, 2), %rcx +lea (%eax, %ebx), %cx +lea (%eax, %ebx), %ecx +lea (%eax, %ebx), %rcx +lea (%rax, %rbx), %cx +lea (%rax, %rbx), %ecx +lea (%rax, %rbx), %rcx +lea (%eax, %ebx, 1), %cx +lea (%eax, %ebx, 1), %ecx +lea (%eax, %ebx, 1), %rcx +lea (%rax, %rbx, 1), %cx +lea (%rax, %rbx, 1), %ecx +lea (%rax, %rbx, 1), %rcx +lea (%eax, %ebx, 2), %cx +lea (%eax, %ebx, 2), %ecx +lea (%eax, %ebx, 2), %rcx +lea (%rax, %rbx, 2), %cx +lea (%rax, %rbx, 2), %ecx +lea (%rax, %rbx, 2), %rcx + +lea -16(), %cx +lea -16(), %ecx +lea -16(), %rcx +lea -16(%eax), %cx +lea -16(%eax), %ecx +lea -16(%eax), %rcx +lea -16(%rax), %cx +lea -16(%rax), %ecx +lea -16(%rax), %rcx +lea -16(, %ebx), %cx +lea -16(, %ebx), %ecx +lea -16(, %ebx), %rcx +lea -16(, %rbx), %cx +lea -16(, %rbx), %ecx +lea -16(, %rbx), %rcx +lea -16(, %ebx, 1), %cx +lea -16(, %ebx, 1), %ecx +lea -16(, %ebx, 1), %rcx +lea -16(, %rbx, 1), %cx +lea -16(, %rbx, 1), %ecx +lea -16(, %rbx, 1), %rcx +lea -16(, %ebx, 2), %cx +lea -16(, %ebx, 2), %ecx +lea -16(, %ebx, 2), %rcx +lea -16(, %rbx, 2), %cx +lea -16(, %rbx, 2), %ecx +lea -16(, %rbx, 2), %rcx +lea -16(%eax, %ebx), %cx +lea -16(%eax, %ebx), %ecx +lea -16(%eax, %ebx), %rcx +lea -16(%rax, %rbx), %cx +lea -16(%rax, %rbx), %ecx +lea -16(%rax, %rbx), %rcx +lea -16(%eax, %ebx, 1), %cx +lea -16(%eax, %ebx, 1), %ecx +lea -16(%eax, %ebx, 1), %rcx +lea -16(%rax, %rbx, 1), %cx +lea -16(%rax, %rbx, 1), %ecx +lea -16(%rax, %rbx, 1), %rcx +lea -16(%eax, %ebx, 2), %cx +lea -16(%eax, %ebx, 2), %ecx +lea -16(%eax, %ebx, 2), %rcx +lea -16(%rax, %rbx, 2), %cx +lea -16(%rax, %rbx, 2), %ecx +lea -16(%rax, %rbx, 2), %rcx + +lea 1024(), %cx +lea 1024(), %ecx +lea 1024(), %rcx +lea 1024(%eax), %cx +lea 1024(%eax), %ecx +lea 1024(%eax), %rcx +lea 1024(%rax), %cx +lea 1024(%rax), %ecx +lea 1024(%rax), %rcx +lea 1024(, %ebx), %cx +lea 1024(, %ebx), %ecx +lea 1024(, %ebx), %rcx +lea 1024(, %rbx), %cx +lea 1024(, %rbx), %ecx +lea 1024(, %rbx), %rcx +lea 1024(, %ebx, 1), %cx +lea 1024(, %ebx, 1), %ecx +lea 1024(, %ebx, 1), %rcx +lea 1024(, %rbx, 1), %cx +lea 1024(, %rbx, 1), %ecx +lea 1024(, %rbx, 1), %rcx +lea 1024(, %ebx, 2), %cx +lea 1024(, %ebx, 2), %ecx +lea 1024(, %ebx, 2), %rcx +lea 1024(, %rbx, 2), %cx +lea 1024(, %rbx, 2), %ecx +lea 1024(, %rbx, 2), %rcx +lea 1024(%eax, %ebx), %cx +lea 1024(%eax, %ebx), %ecx +lea 1024(%eax, %ebx), %rcx +lea 1024(%rax, %rbx), %cx +lea 1024(%rax, %rbx), %ecx +lea 1024(%rax, %rbx), %rcx +lea 1024(%eax, %ebx, 1), %cx +lea 1024(%eax, %ebx, 1), %ecx +lea 1024(%eax, %ebx, 1), %rcx +lea 1024(%rax, %rbx, 1), %cx +lea 1024(%rax, %rbx, 1), %ecx +lea 1024(%rax, %rbx, 1), %rcx +lea 1024(%eax, %ebx, 2), %cx +lea 1024(%eax, %ebx, 2), %ecx +lea 1024(%eax, %ebx, 2), %rcx +lea 1024(%rax, %rbx, 2), %cx +lea 1024(%rax, %rbx, 2), %ecx +lea 1024(%rax, %rbx, 2), %rcx + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 2 1.00 leaw 0, %cx +# CHECK-NEXT: 1 1 0.33 leal 0, %ecx +# CHECK-NEXT: 1 1 0.33 leaq 0, %rcx +# CHECK-NEXT: 2 2 1.00 leaw (%eax), %cx +# CHECK-NEXT: 1 1 0.33 leal (%eax), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (%eax), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (%rax), %cx +# CHECK-NEXT: 1 1 0.33 leal (%rax), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (%rax), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (,%ebx), %cx +# CHECK-NEXT: 1 1 0.33 leal (,%ebx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (,%rbx), %cx +# CHECK-NEXT: 1 1 0.33 leal (,%rbx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (,%ebx), %cx +# CHECK-NEXT: 1 1 0.33 leal (,%ebx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (,%rbx), %cx +# CHECK-NEXT: 1 1 0.33 leal (,%rbx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (,%ebx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal (,%ebx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq (,%ebx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (,%rbx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal (,%rbx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq (,%rbx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (%eax,%ebx), %cx +# CHECK-NEXT: 1 1 0.33 leal (%eax,%ebx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (%eax,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (%rax,%rbx), %cx +# CHECK-NEXT: 1 1 0.33 leal (%rax,%rbx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (%rax,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (%eax,%ebx), %cx +# CHECK-NEXT: 1 1 0.33 leal (%eax,%ebx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (%eax,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (%rax,%rbx), %cx +# CHECK-NEXT: 1 1 0.33 leal (%rax,%rbx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq (%rax,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (%eax,%ebx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal (%eax,%ebx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq (%eax,%ebx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw (%rax,%rbx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal (%rax,%rbx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq (%rax,%rbx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16, %cx +# CHECK-NEXT: 1 1 0.33 leal -16, %ecx +# CHECK-NEXT: 1 1 0.33 leaq -16, %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(%eax), %cx +# CHECK-NEXT: 1 1 0.33 leal -16(%eax), %ecx +# CHECK-NEXT: 1 1 0.33 leaq -16(%eax), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(%rax), %cx +# CHECK-NEXT: 1 1 0.33 leal -16(%rax), %ecx +# CHECK-NEXT: 1 1 0.33 leaq -16(%rax), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(,%ebx), %cx +# CHECK-NEXT: 1 1 0.33 leal -16(,%ebx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq -16(,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(,%rbx), %cx +# CHECK-NEXT: 1 1 0.33 leal -16(,%rbx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq -16(,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(,%ebx), %cx +# CHECK-NEXT: 1 1 0.33 leal -16(,%ebx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq -16(,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(,%rbx), %cx +# CHECK-NEXT: 1 1 0.33 leal -16(,%rbx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq -16(,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(,%ebx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal -16(,%ebx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq -16(,%ebx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(,%rbx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal -16(,%rbx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq -16(,%rbx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx), %ecx +# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx), %ecx +# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx), %ecx +# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx), %ecx +# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024, %cx +# CHECK-NEXT: 1 1 0.33 leal 1024, %ecx +# CHECK-NEXT: 1 1 0.33 leaq 1024, %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax), %cx +# CHECK-NEXT: 1 1 0.33 leal 1024(%eax), %ecx +# CHECK-NEXT: 1 1 0.33 leaq 1024(%eax), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax), %cx +# CHECK-NEXT: 1 1 0.33 leal 1024(%rax), %ecx +# CHECK-NEXT: 1 1 0.33 leaq 1024(%rax), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(,%ebx), %cx +# CHECK-NEXT: 1 1 0.33 leal 1024(,%ebx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq 1024(,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(,%rbx), %cx +# CHECK-NEXT: 1 1 0.33 leal 1024(,%rbx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq 1024(,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(,%ebx), %cx +# CHECK-NEXT: 1 1 0.33 leal 1024(,%ebx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq 1024(,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(,%rbx), %cx +# CHECK-NEXT: 1 1 0.33 leal 1024(,%rbx), %ecx +# CHECK-NEXT: 1 1 0.33 leaq 1024(,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(,%ebx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal 1024(,%ebx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq 1024(,%ebx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(,%rbx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal 1024(,%rbx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq 1024(,%rbx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx), %ecx +# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx), %ecx +# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx), %ecx +# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx), %ecx +# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx,2), %rcx +# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx,2), %cx +# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx,2), %ecx +# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx,2), %rcx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 16.67 16.67 16.67 55.00 55.00 55.00 55.00 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 0, %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal 0, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq 0, %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (%eax), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (%eax), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (%eax), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (%rax), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (%rax), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (%rax), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (,%ebx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (,%rbx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (,%ebx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (,%ebx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (,%rbx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (,%rbx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (,%ebx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal (,%ebx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq (,%ebx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (,%rbx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal (,%rbx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq (,%rbx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (%eax,%ebx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (%eax,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (%rax,%rbx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (%rax,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (%eax,%ebx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (%eax,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal (%rax,%rbx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq (%rax,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (%eax,%ebx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal (%eax,%ebx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq (%eax,%ebx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw (%rax,%rbx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal (%rax,%rbx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq (%rax,%rbx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16, %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal -16, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq -16, %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(%eax), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal -16(%eax), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq -16(%eax), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(%rax), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal -16(%rax), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq -16(%rax), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal -16(,%ebx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq -16(,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal -16(,%rbx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq -16(,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(,%ebx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal -16(,%ebx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq -16(,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(,%rbx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal -16(,%rbx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq -16(,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(,%ebx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal -16(,%ebx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq -16(,%ebx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(,%rbx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal -16(,%rbx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq -16(,%rbx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024, %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal 1024, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq 1024, %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(%eax), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal 1024(%eax), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq 1024(%eax), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(%rax), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal 1024(%rax), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq 1024(%rax), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal 1024(,%ebx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq 1024(,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal 1024(,%rbx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq 1024(,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal 1024(,%ebx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq 1024(,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leal 1024(,%rbx), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - leaq 1024(,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(,%ebx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal 1024(,%ebx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq 1024(,%ebx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(,%rbx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal 1024(,%rbx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq 1024(,%rbx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx,2), %rcx +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx,2), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx,2), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx,2), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s @@ -0,0 +1,65 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +lzcntw %cx, %cx +lzcntw (%rax), %cx + +lzcntl %eax, %ecx +lzcntl (%rax), %ecx + +lzcntq %rax, %rcx +lzcntq (%rax), %rcx + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 lzcntw %cx, %cx +# CHECK-NEXT: 1 5 0.33 * lzcntw (%rax), %cx +# CHECK-NEXT: 1 1 0.25 lzcntl %eax, %ecx +# CHECK-NEXT: 1 5 0.33 * lzcntl (%rax), %ecx +# CHECK-NEXT: 1 1 0.25 lzcntq %rax, %rcx +# CHECK-NEXT: 1 5 0.33 * lzcntq (%rax), %rcx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 1.00 1.00 1.00 2.25 2.25 2.25 2.25 - - - - - - - - 1.00 1.00 1.00 1.00 1.00 1.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - lzcntw %cx, %cx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - lzcntw (%rax), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - lzcntl %eax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - lzcntl (%rax), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - lzcntq %rax, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - lzcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s @@ -0,0 +1,408 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +emms + +movd %eax, %mm2 +movd (%rax), %mm2 + +movd %mm0, %ecx +movd %mm0, (%rax) + +movq %rax, %mm2 +movq (%rax), %mm2 + +movq %mm0, %rcx +movq %mm0, (%rax) + +packsswb %mm0, %mm2 +packsswb (%rax), %mm2 + +packssdw %mm0, %mm2 +packssdw (%rax), %mm2 + +packuswb %mm0, %mm2 +packuswb (%rax), %mm2 + +paddb %mm0, %mm2 +paddb (%rax), %mm2 + +paddd %mm0, %mm2 +paddd (%rax), %mm2 + +paddsb %mm0, %mm2 +paddsb (%rax), %mm2 + +paddsw %mm0, %mm2 +paddsw (%rax), %mm2 + +paddusb %mm0, %mm2 +paddusb (%rax), %mm2 + +paddusw %mm0, %mm2 +paddusw (%rax), %mm2 + +paddw %mm0, %mm2 +paddw (%rax), %mm2 + +pand %mm0, %mm2 +pand (%rax), %mm2 + +pandn %mm0, %mm2 +pandn (%rax), %mm2 + +pcmpeqb %mm0, %mm2 +pcmpeqb (%rax), %mm2 + +pcmpeqd %mm0, %mm2 +pcmpeqd (%rax), %mm2 + +pcmpeqw %mm0, %mm2 +pcmpeqw (%rax), %mm2 + +pcmpgtb %mm0, %mm2 +pcmpgtb (%rax), %mm2 + +pcmpgtd %mm0, %mm2 +pcmpgtd (%rax), %mm2 + +pcmpgtw %mm0, %mm2 +pcmpgtw (%rax), %mm2 + +pmaddwd %mm0, %mm2 +pmaddwd (%rax), %mm2 + +pmulhw %mm0, %mm2 +pmulhw (%rax), %mm2 + +pmullw %mm0, %mm2 +pmullw (%rax), %mm2 + +por %mm0, %mm2 +por (%rax), %mm2 + +pslld $1, %mm2 +pslld %mm0, %mm2 +pslld (%rax), %mm2 + +psllq $1, %mm2 +psllq %mm0, %mm2 +psllq (%rax), %mm2 + +psllw $1, %mm2 +psllw %mm0, %mm2 +psllw (%rax), %mm2 + +psrad $1, %mm2 +psrad %mm0, %mm2 +psrad (%rax), %mm2 + +psraw $1, %mm2 +psraw %mm0, %mm2 +psraw (%rax), %mm2 + +psrld $1, %mm2 +psrld %mm0, %mm2 +psrld (%rax), %mm2 + +psrlq $1, %mm2 +psrlq %mm0, %mm2 +psrlq (%rax), %mm2 + +psrlw $1, %mm2 +psrlw %mm0, %mm2 +psrlw (%rax), %mm2 + +psubb %mm0, %mm2 +psubb (%rax), %mm2 + +psubd %mm0, %mm2 +psubd (%rax), %mm2 + +psubsb %mm0, %mm2 +psubsb (%rax), %mm2 + +psubsw %mm0, %mm2 +psubsw (%rax), %mm2 + +psubusb %mm0, %mm2 +psubusb (%rax), %mm2 + +psubusw %mm0, %mm2 +psubusw (%rax), %mm2 + +psubw %mm0, %mm2 +psubw (%rax), %mm2 + +punpckhbw %mm0, %mm2 +punpckhbw (%rax), %mm2 + +punpckhdq %mm0, %mm2 +punpckhdq (%rax), %mm2 + +punpckhwd %mm0, %mm2 +punpckhwd (%rax), %mm2 + +punpcklbw %mm0, %mm2 +punpcklbw (%rax), %mm2 + +punpckldq %mm0, %mm2 +punpckldq (%rax), %mm2 + +punpcklwd %mm0, %mm2 +punpcklwd (%rax), %mm2 + +pxor %mm0, %mm2 +pxor (%rax), %mm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 2 0.25 * * U emms +# CHECK-NEXT: 2 1 1.00 movd %eax, %mm2 +# CHECK-NEXT: 1 8 0.50 * movd (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 movd %mm0, %ecx +# CHECK-NEXT: 1 1 1.00 * U movd %mm0, (%rax) +# CHECK-NEXT: 2 1 1.00 movq %rax, %mm2 +# CHECK-NEXT: 1 8 0.50 * movq (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 movq %mm0, %rcx +# CHECK-NEXT: 1 1 1.00 * movq %mm0, (%rax) +# CHECK-NEXT: 1 1 0.50 packsswb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * packsswb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 packssdw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * packssdw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 packuswb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * packuswb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 paddb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * paddb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 paddd %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * paddd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 paddsb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * paddsb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 paddsw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * paddsw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 paddusb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * paddusb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 paddusw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * paddusw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 paddw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * paddw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pand %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pand (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pandn %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pandn (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pcmpeqb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pcmpeqb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pcmpeqd %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pcmpeqd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pcmpeqw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pcmpeqw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pcmpgtb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pcmpgtb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pcmpgtd %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pcmpgtd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pcmpgtw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pcmpgtw (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 pmaddwd %mm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * pmaddwd (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 pmulhw %mm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * pmulhw (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 pmullw %mm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * pmullw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 por %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * por (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pslld $1, %mm2 +# CHECK-NEXT: 1 1 0.50 pslld %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pslld (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psllq $1, %mm2 +# CHECK-NEXT: 1 1 0.50 psllq %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psllq (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psllw $1, %mm2 +# CHECK-NEXT: 1 1 0.50 psllw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psllw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psrad $1, %mm2 +# CHECK-NEXT: 1 1 0.50 psrad %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psrad (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psraw $1, %mm2 +# CHECK-NEXT: 1 1 0.50 psraw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psraw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psrld $1, %mm2 +# CHECK-NEXT: 1 1 0.50 psrld %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psrld (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psrlq $1, %mm2 +# CHECK-NEXT: 1 1 0.50 psrlq %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psrlq (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psrlw $1, %mm2 +# CHECK-NEXT: 1 1 0.50 psrlw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psrlw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 psubb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psubb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 psubd %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psubd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psubsb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psubsb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psubsw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psubsw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psubusb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psubusb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psubusw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psubusw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 psubw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psubw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 punpckhbw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * punpckhbw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 punpckhdq %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * punpckhdq (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 punpckhwd %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * punpckhwd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 punpcklbw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * punpcklbw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 punpckldq %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * punpckldq (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 punpcklwd %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * punpcklwd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pxor %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pxor (%rax), %mm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - 19.00 37.00 33.00 15.00 27.00 27.00 2.00 16.00 16.00 16.00 15.33 15.33 15.33 1.00 1.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - emms +# CHECK-NEXT: - - - - - - - - 1.00 1.00 1.00 1.00 0.50 0.50 - - - - - - - - - movd %eax, %mm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - movd %mm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movd %mm0, (%rax) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 1.00 1.00 0.50 0.50 - - - - - - - - - movq %rax, %mm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - movq %mm0, %rcx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movq %mm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packsswb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packsswb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packssdw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packssdw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packuswb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packuswb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - paddsb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddsb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - paddsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - paddusb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddusb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - paddusw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddusw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pand %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pand (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pandn %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pandn (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpeqb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpeqb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpeqd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpeqd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpeqw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpeqw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpgtb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpgtb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpgtd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpgtd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpgtw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpgtw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmaddwd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaddwd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmulhw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmulhw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmullw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmullw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - por %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - por (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pslld $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pslld %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pslld (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psllq $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psllq %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psllq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psllw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psllw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psllw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrad $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrad %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psrad (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psraw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psraw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psraw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrld $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrld %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psrld (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrlq $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrlq %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psrlq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrlw $1, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrlw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psrlw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - psubb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - psubd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psubsb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubsb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psubsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psubusb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubusb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psubusw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubusw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - psubw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckhbw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckhbw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckhdq %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckhdq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckhwd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckhwd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpcklbw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpcklbw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckldq %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckldq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpcklwd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpcklwd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pxor %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pxor (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s @@ -0,0 +1,65 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +movbe %cx, (%rax) +movbe (%rax), %cx + +movbe %ecx, (%rax) +movbe (%rax), %ecx + +movbe %rcx, (%rax) +movbe (%rax), %rcx + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 1 1.00 * movbew %cx, (%rax) +# CHECK-NEXT: 1 4 1.00 * movbew (%rax), %cx +# CHECK-NEXT: 2 1 1.00 * movbel %ecx, (%rax) +# CHECK-NEXT: 1 5 0.33 * movbel (%rax), %ecx +# CHECK-NEXT: 2 1 1.00 * movbeq %rcx, (%rax) +# CHECK-NEXT: 1 5 0.33 * movbeq (%rax), %rcx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 2.00 2.00 2.00 4.50 4.50 4.50 4.50 - - - - - - - - 2.00 2.00 2.00 1.00 1.00 1.00 1.50 1.50 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 movbew %cx, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movbew (%rax), %cx +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 movbel %ecx, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movbel (%rax), %ecx +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 movbeq %rcx, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movbeq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s @@ -0,0 +1,51 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +monitorx +mwaitx + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 100 100 25.00 U monitorx +# CHECK-NEXT: 100 100 25.00 U mwaitx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 50.00 50.00 50.00 50.00 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - monitorx +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - mwaitx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s @@ -0,0 +1,51 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +pclmulqdq $11, %xmm0, %xmm2 +pclmulqdq $11, (%rax), %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 4 4 2.00 pclmulqdq $11, %xmm0, %xmm2 +# CHECK-NEXT: 4 11 2.00 * pclmulqdq $11, (%rax), %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - pclmulqdq $11, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pclmulqdq $11, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s @@ -0,0 +1,65 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +popcntw %cx, %cx +popcntw (%rax), %cx + +popcntl %eax, %ecx +popcntl (%rax), %ecx + +popcntq %rax, %rcx +popcntq (%rax), %rcx + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 popcntw %cx, %cx +# CHECK-NEXT: 1 5 0.33 * popcntw (%rax), %cx +# CHECK-NEXT: 1 1 0.25 popcntl %eax, %ecx +# CHECK-NEXT: 1 5 0.33 * popcntl (%rax), %ecx +# CHECK-NEXT: 1 1 0.25 popcntq %rax, %rcx +# CHECK-NEXT: 1 5 0.33 * popcntq (%rax), %rcx + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 1.00 1.00 1.00 2.25 2.25 2.25 2.25 - - - - - - - - 1.00 1.00 1.00 1.00 1.00 1.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - popcntw %cx, %cx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - popcntw (%rax), %cx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - popcntl %eax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - popcntl (%rax), %ecx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - popcntq %rax, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - popcntq (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s @@ -0,0 +1,51 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +prefetch (%rax) +prefetchw (%rax) + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 5 0.33 * * prefetch (%rax) +# CHECK-NEXT: 1 5 0.33 * * prefetchw (%rax) + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 0.67 0.67 0.67 - - - - - - - - - - - - 0.67 0.67 0.67 0.67 0.67 0.67 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - prefetch (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - prefetchw (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s @@ -0,0 +1,54 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +rdrand %ax +rdrand %eax +rdrand %rax + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 100 100 25.00 U rdrandw %ax +# CHECK-NEXT: 100 100 25.00 U rdrandl %eax +# CHECK-NEXT: 100 100 25.00 U rdrandq %rax + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 75.00 75.00 75.00 75.00 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdrandw %ax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdrandl %eax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdrandq %rax diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s @@ -0,0 +1,54 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +rdseed %ax +rdseed %eax +rdseed %rax + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 100 100 25.00 U rdseedw %ax +# CHECK-NEXT: 100 100 25.00 U rdseedl %eax +# CHECK-NEXT: 100 100 25.00 U rdseedq %rax + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 75.00 75.00 75.00 75.00 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdseedw %ax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdseedl %eax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdseedq %rax diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s @@ -0,0 +1,93 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +sha1msg1 %xmm0, %xmm2 +sha1msg1 (%rax), %xmm2 + +sha1msg2 %xmm0, %xmm2 +sha1msg2 (%rax), %xmm2 + +sha1nexte %xmm0, %xmm2 +sha1nexte (%rax), %xmm2 + +sha1rnds4 $3, %xmm0, %xmm2 +sha1rnds4 $3, (%rax), %xmm2 + +sha256msg1 %xmm0, %xmm2 +sha256msg1 (%rax), %xmm2 + +sha256msg2 %xmm0, %xmm2 +sha256msg2 (%rax), %xmm2 + +sha256rnds2 %xmm0, %xmm2 +sha256rnds2 (%rax), %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 2 0.50 sha1msg1 %xmm0, %xmm2 +# CHECK-NEXT: 2 6 0.50 * sha1msg1 (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 sha1msg2 %xmm0, %xmm2 +# CHECK-NEXT: 1 5 0.50 * sha1msg2 (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 sha1nexte %xmm0, %xmm2 +# CHECK-NEXT: 1 5 0.50 * sha1nexte (%rax), %xmm2 +# CHECK-NEXT: 1 6 2.00 sha1rnds4 $3, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * sha1rnds4 $3, (%rax), %xmm2 +# CHECK-NEXT: 2 2 0.75 sha256msg1 %xmm0, %xmm2 +# CHECK-NEXT: 2 6 0.75 * sha256msg1 (%rax), %xmm2 +# CHECK-NEXT: 4 3 2.00 sha256msg2 %xmm0, %xmm2 +# CHECK-NEXT: 5 7 2.00 * sha256msg2 (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 sha256rnds2 %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * sha256rnds2 %xmm0, (%rax), %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 1.67 1.67 1.67 - - - - - 13.50 12.50 12.50 13.50 1.00 1.00 - 2.33 2.33 2.33 2.33 2.33 2.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - sha1msg1 %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 0.50 0.50 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sha1msg1 (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - sha1msg2 %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 0.50 0.50 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sha1msg2 (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - sha1nexte %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 0.50 0.50 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sha1nexte (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - sha1rnds4 $3, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - sha1rnds4 $3, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.75 0.75 0.75 0.75 - - - - - - - - - - - sha256msg1 %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.75 0.75 0.75 0.75 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sha256msg1 (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - sha256msg2 %xmm0, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 2.00 2.00 2.00 2.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sha256msg2 (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - sha256rnds2 %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - sha256rnds2 %xmm0, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s @@ -0,0 +1,476 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +addps %xmm0, %xmm2 +addps (%rax), %xmm2 + +addss %xmm0, %xmm2 +addss (%rax), %xmm2 + +andnps %xmm0, %xmm2 +andnps (%rax), %xmm2 + +andps %xmm0, %xmm2 +andps (%rax), %xmm2 + +cmpps $0, %xmm0, %xmm2 +cmpps $0, (%rax), %xmm2 + +cmpss $0, %xmm0, %xmm2 +cmpss $0, (%rax), %xmm2 + +comiss %xmm0, %xmm1 +comiss (%rax), %xmm1 + +cvtpi2ps %mm0, %xmm2 +cvtpi2ps (%rax), %xmm2 + +cvtps2pi %xmm0, %mm2 +cvtps2pi (%rax), %mm2 + +cvtsi2ss %ecx, %xmm2 +cvtsi2ss %rcx, %xmm2 +cvtsi2ss (%rax), %xmm2 +cvtsi2ss (%rax), %xmm2 + +cvtss2si %xmm0, %ecx +cvtss2si %xmm0, %rcx +cvtss2si (%rax), %ecx +cvtss2si (%rax), %rcx + +cvttps2pi %xmm0, %mm2 +cvttps2pi (%rax), %mm2 + +cvttss2si %xmm0, %ecx +cvttss2si %xmm0, %rcx +cvttss2si (%rax), %ecx +cvttss2si (%rax), %rcx + +divps %xmm0, %xmm2 +divps (%rax), %xmm2 + +divss %xmm0, %xmm2 +divss (%rax), %xmm2 + +ldmxcsr (%rax) + +maskmovq %mm0, %mm1 + +maxps %xmm0, %xmm2 +maxps (%rax), %xmm2 + +maxss %xmm0, %xmm2 +maxss (%rax), %xmm2 + +minps %xmm0, %xmm2 +minps (%rax), %xmm2 + +minss %xmm0, %xmm2 +minss (%rax), %xmm2 + +movaps %xmm0, %xmm2 +movaps %xmm0, (%rax) +movaps (%rax), %xmm2 + +movhlps %xmm0, %xmm2 +movlhps %xmm0, %xmm2 + +movhps %xmm0, (%rax) +movhps (%rax), %xmm2 + +movlps %xmm0, (%rax) +movlps (%rax), %xmm2 + +movmskps %xmm0, %rcx + +movntps %xmm0, (%rax) +movntq %mm0, (%rax) + +movss %xmm0, %xmm2 +movss %xmm0, (%rax) +movss (%rax), %xmm2 + +movups %xmm0, %xmm2 +movups %xmm0, (%rax) +movups (%rax), %xmm2 + +mulps %xmm0, %xmm2 +mulps (%rax), %xmm2 + +mulss %xmm0, %xmm2 +mulss (%rax), %xmm2 + +orps %xmm0, %xmm2 +orps (%rax), %xmm2 + +pavgb %mm0, %mm2 +pavgb (%rax), %mm2 + +pavgw %mm0, %mm2 +pavgw (%rax), %mm2 + +pextrw $1, %mm0, %rcx + +pinsrw $1, %rax, %mm2 +pinsrw $1, (%rax), %mm2 + +pmaxsw %mm0, %mm2 +pmaxsw (%rax), %mm2 + +pmaxub %mm0, %mm2 +pmaxub (%rax), %mm2 + +pminsw %mm0, %mm2 +pminsw (%rax), %mm2 + +pminub %mm0, %mm2 +pminub (%rax), %mm2 + +pmovmskb %mm0, %rcx + +pmulhuw %mm0, %mm2 +pmulhuw (%rax), %mm2 + +prefetcht0 (%rax) +prefetcht1 (%rax) +prefetcht2 (%rax) +prefetchnta (%rax) + +psadbw %mm0, %mm2 +psadbw (%rax), %mm2 + +pshufw $1, %mm0, %mm2 +pshufw $1, (%rax), %mm2 + +rcpps %xmm0, %xmm2 +rcpps (%rax), %xmm2 + +rcpss %xmm0, %xmm2 +rcpss (%rax), %xmm2 + +rsqrtps %xmm0, %xmm2 +rsqrtps (%rax), %xmm2 + +rsqrtss %xmm0, %xmm2 +rsqrtss (%rax), %xmm2 + +sfence + +shufps $1, %xmm0, %xmm2 +shufps $1, (%rax), %xmm2 + +sqrtps %xmm0, %xmm2 +sqrtps (%rax), %xmm2 + +sqrtss %xmm0, %xmm2 +sqrtss (%rax), %xmm2 + +stmxcsr (%rax) + +subps %xmm0, %xmm2 +subps (%rax), %xmm2 + +subss %xmm0, %xmm2 +subss (%rax), %xmm2 + +ucomiss %xmm0, %xmm1 +ucomiss (%rax), %xmm1 + +unpckhps %xmm0, %xmm2 +unpckhps (%rax), %xmm2 + +unpcklps %xmm0, %xmm2 +unpcklps (%rax), %xmm2 + +xorps %xmm0, %xmm2 +xorps (%rax), %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 0.50 addps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * addps (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 addss %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * addss (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 andnps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * andnps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 andps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * andps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 cmpeqps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * cmpeqps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 cmpeqss %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * cmpeqss (%rax), %xmm2 +# CHECK-NEXT: 2 4 1.00 comiss %xmm0, %xmm1 +# CHECK-NEXT: 2 11 1.00 * comiss (%rax), %xmm1 +# CHECK-NEXT: 2 3 0.50 cvtpi2ps %mm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 cvtps2pi %xmm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * cvtps2pi (%rax), %mm2 +# CHECK-NEXT: 2 4 1.00 cvtsi2ss %ecx, %xmm2 +# CHECK-NEXT: 2 4 1.00 cvtsi2ss %rcx, %xmm2 +# CHECK-NEXT: 1 10 1.00 * cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: 1 10 1.00 * cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: 2 2 1.00 cvtss2si %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 cvtss2si %xmm0, %rcx +# CHECK-NEXT: 2 9 1.00 * cvtss2si (%rax), %ecx +# CHECK-NEXT: 2 9 1.00 * cvtss2si (%rax), %rcx +# CHECK-NEXT: 1 3 0.50 cvttps2pi %xmm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * cvttps2pi (%rax), %mm2 +# CHECK-NEXT: 2 2 1.00 cvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 cvttss2si %xmm0, %rcx +# CHECK-NEXT: 2 9 1.00 * cvttss2si (%rax), %ecx +# CHECK-NEXT: 2 9 1.00 * cvttss2si (%rax), %rcx +# CHECK-NEXT: 1 11 3.00 divps %xmm0, %xmm2 +# CHECK-NEXT: 1 18 3.00 * divps (%rax), %xmm2 +# CHECK-NEXT: 1 11 3.00 divss %xmm0, %xmm2 +# CHECK-NEXT: 1 18 3.00 * divss (%rax), %xmm2 +# CHECK-NEXT: 1 5 1.50 * * U ldmxcsr (%rax) +# CHECK-NEXT: 1 1 0.50 * * U maskmovq %mm0, %mm1 +# CHECK-NEXT: 1 1 0.50 maxps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * maxps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 maxss %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * maxss (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 minps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * minps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 minss %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * minss (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 movaps %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movaps %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movaps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movhlps %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 movlhps %xmm0, %xmm2 +# CHECK-NEXT: 2 2 1.00 * movhps %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movhps (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 * movlps %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movlps (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 movmskps %xmm0, %ecx +# CHECK-NEXT: 1 1 1.00 * movntps %xmm0, (%rax) +# CHECK-NEXT: 1 1 1.00 * * U movntq %mm0, (%rax) +# CHECK-NEXT: 1 1 0.50 movss %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movss %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movss (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 movups %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movups %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movups (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 mulps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * mulps (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 mulss %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * mulss (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 orps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * orps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pavgb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pavgb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pavgw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pavgw (%rax), %mm2 +# CHECK-NEXT: 2 1 1.00 pextrw $1, %mm0, %ecx +# CHECK-NEXT: 2 2 1.00 pinsrw $1, %eax, %mm2 +# CHECK-NEXT: 1 8 1.50 * pinsrw $1, (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pmaxsw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pmaxsw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pmaxub %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pmaxub (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pminsw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pminsw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 pminub %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pminub (%rax), %mm2 +# CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx +# CHECK-NEXT: 1 3 0.50 pmulhuw %mm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * pmulhuw (%rax), %mm2 +# CHECK-NEXT: 1 5 0.33 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 5 0.33 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 5 0.33 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 5 0.33 * * prefetchnta (%rax) +# CHECK-NEXT: 1 3 0.50 psadbw %mm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * psadbw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pshufw $1, (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 rcpps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * rcpps (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 rcpss %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * rcpss (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 rsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * rsqrtps (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 rsqrtss %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * rsqrtss (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.33 * * U sfence +# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 15 5.00 sqrtps %xmm0, %xmm2 +# CHECK-NEXT: 1 22 5.00 * sqrtps (%rax), %xmm2 +# CHECK-NEXT: 1 15 5.00 sqrtss %xmm0, %xmm2 +# CHECK-NEXT: 1 22 5.00 * sqrtss (%rax), %xmm2 +# CHECK-NEXT: 2 2 15.00 * U stmxcsr (%rax) +# CHECK-NEXT: 1 3 0.50 subps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * subps (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 subss %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * subss (%rax), %xmm2 +# CHECK-NEXT: 2 4 1.00 ucomiss %xmm0, %xmm1 +# CHECK-NEXT: 2 11 1.00 * ucomiss (%rax), %xmm1 +# CHECK-NEXT: 1 1 0.50 unpckhps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * unpckhps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 unpcklps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * unpcklps (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 xorps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * xorps (%rax), %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 2.00 2.00 2.00 16.50 16.50 16.50 16.50 - 22.00 64.00 34.00 26.00 33.00 33.00 7.00 22.33 22.33 22.33 19.33 19.33 19.33 4.00 4.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - addps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - addss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - andnps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - andnps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - andps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - andps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - cmpeqps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpeqps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - cmpeqss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpeqss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - comiss %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - comiss (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtpi2ps %mm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtpi2ps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtps2pi %xmm0, %mm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtps2pi (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtsi2ss %ecx, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtsi2ss %rcx, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtsi2ssl (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtss2si %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtss2si %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtss2si (%rax), %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtss2si (%rax), %rcx +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvttps2pi %xmm0, %mm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvttps2pi (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvttss2si %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvttss2si %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvttss2si (%rax), %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvttss2si (%rax), %rcx +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - divps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - divps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - divss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - divss (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 1.50 1.50 1.50 1.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - ldmxcsr (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - maskmovq %mm0, %mm1 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - maxps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - maxps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - maxss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - maxss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - minps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - minps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - minss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - minss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - movaps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movaps %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movaps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - movhlps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - movlhps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movhps %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movhps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movlps %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movlps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - movmskps %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movntps %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movntq %mm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - movss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movss %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - movups %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movups %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movups (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - mulps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - mulss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - orps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - orps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pavgb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pavgb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pavgw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pavgw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pextrw $1, %mm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pinsrw $1, %eax, %mm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pinsrw $1, (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pmaxsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaxsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pmaxub %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaxub (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminub %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminub (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - pmovmskb %mm0, %ecx +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmulhuw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmulhuw (%rax), %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - prefetcht0 (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - prefetcht1 (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - prefetcht2 (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - prefetchnta (%rax) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - psadbw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psadbw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pshufw $1, %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pshufw $1, (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - rcpps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcpps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - rcpss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcpss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - rsqrtps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - rsqrtps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - rsqrtss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - rsqrtss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - - - - 0.33 0.33 0.33 - - - - - sfence +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - shufps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - shufps $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - sqrtps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - sqrtps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - sqrtss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - sqrtss (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 15.00 15.00 15.00 15.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 stmxcsr (%rax) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - subps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - subps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - subss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - subss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - ucomiss %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - ucomiss (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - unpckhps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - unpckhps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - unpcklps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - unpcklps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - xorps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - xorps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s @@ -0,0 +1,975 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +addpd %xmm0, %xmm2 +addpd (%rax), %xmm2 + +addsd %xmm0, %xmm2 +addsd (%rax), %xmm2 + +andnpd %xmm0, %xmm2 +andnpd (%rax), %xmm2 + +andpd %xmm0, %xmm2 +andpd (%rax), %xmm2 + +clflush (%rax) + +cmppd $0, %xmm0, %xmm2 +cmppd $0, (%rax), %xmm2 + +cmpsd $0, %xmm0, %xmm2 +cmpsd $0, (%rax), %xmm2 + +comisd %xmm0, %xmm1 +comisd (%rax), %xmm1 + +cvtdq2pd %xmm0, %xmm2 +cvtdq2pd (%rax), %xmm2 + +cvtdq2ps %xmm0, %xmm2 +cvtdq2ps (%rax), %xmm2 + +cvtpd2dq %xmm0, %xmm2 +cvtpd2dq (%rax), %xmm2 + +cvtpd2pi %xmm0, %mm2 +cvtpd2pi (%rax), %mm2 + +cvtpd2ps %xmm0, %xmm2 +cvtpd2ps (%rax), %xmm2 + +cvtpi2pd %mm0, %xmm2 +cvtpi2pd (%rax), %xmm2 + +cvtps2dq %xmm0, %xmm2 +cvtps2dq (%rax), %xmm2 + +cvtps2pd %xmm0, %xmm2 +cvtps2pd (%rax), %xmm2 + +cvtsd2si %xmm0, %ecx +cvtsd2si %xmm0, %rcx +cvtsd2si (%rax), %ecx +cvtsd2si (%rax), %rcx + +cvtsd2ss %xmm0, %xmm2 +cvtsd2ss (%rax), %xmm2 + +cvtsi2sd %ecx, %xmm2 +cvtsi2sd %rcx, %xmm2 +cvtsi2sd (%rax), %xmm2 +cvtsi2sd (%rax), %xmm2 + +cvtss2sd %xmm0, %xmm2 +cvtss2sd (%rax), %xmm2 + +cvttpd2dq %xmm0, %xmm2 +cvttpd2dq (%rax), %xmm2 + +cvttpd2pi %xmm0, %mm2 +cvttpd2pi (%rax), %mm2 + +cvttps2dq %xmm0, %xmm2 +cvttps2dq (%rax), %xmm2 + +cvttsd2si %xmm0, %ecx +cvttsd2si %xmm0, %rcx +cvttsd2si (%rax), %ecx +cvttsd2si (%rax), %rcx + +divpd %xmm0, %xmm2 +divpd (%rax), %xmm2 + +divsd %xmm0, %xmm2 +divsd (%rax), %xmm2 + +lfence + +maskmovdqu %xmm0, %xmm1 + +maxpd %xmm0, %xmm2 +maxpd (%rax), %xmm2 + +maxsd %xmm0, %xmm2 +maxsd (%rax), %xmm2 + +mfence + +minpd %xmm0, %xmm2 +minpd (%rax), %xmm2 + +minsd %xmm0, %xmm2 +minsd (%rax), %xmm2 + +movapd %xmm0, %xmm2 +movapd %xmm0, (%rax) +movapd (%rax), %xmm2 + +movd %eax, %xmm2 +movd (%rax), %xmm2 + +movd %xmm0, %ecx +movd %xmm0, (%rax) + +movdqa %xmm0, %xmm2 +movdqa %xmm0, (%rax) +movdqa (%rax), %xmm2 + +movdqu %xmm0, %xmm2 +movdqu %xmm0, (%rax) +movdqu (%rax), %xmm2 + +movdq2q %xmm0, %mm2 + +movhpd %xmm0, (%rax) +movhpd (%rax), %xmm2 + +movlpd %xmm0, (%rax) +movlpd (%rax), %xmm2 + +movmskpd %xmm0, %rcx + +movntil %eax, (%rax) +movntiq %rax, (%rax) + +movntdq %xmm0, (%rax) +movntpd %xmm0, (%rax) + +movq %xmm0, %xmm2 + +movq %rax, %xmm2 +movq (%rax), %xmm2 + +movq %xmm0, %rcx +movq %xmm0, (%rax) + +movq2dq %mm0, %xmm2 + +movsd %xmm0, %xmm2 +movsd %xmm0, (%rax) +movsd (%rax), %xmm2 + +movupd %xmm0, %xmm2 +movupd %xmm0, (%rax) +movupd (%rax), %xmm2 + +mulpd %xmm0, %xmm2 +mulpd (%rax), %xmm2 + +mulsd %xmm0, %xmm2 +mulsd (%rax), %xmm2 + +orpd %xmm0, %xmm2 +orpd (%rax), %xmm2 + +packssdw %xmm0, %xmm2 +packssdw (%rax), %xmm2 + +packsswb %xmm0, %xmm2 +packsswb (%rax), %xmm2 + +packuswb %xmm0, %xmm2 +packuswb (%rax), %xmm2 + +paddb %xmm0, %xmm2 +paddb (%rax), %xmm2 + +paddd %xmm0, %xmm2 +paddd (%rax), %xmm2 + +paddq %mm0, %mm2 +paddq (%rax), %mm2 + +paddq %xmm0, %xmm2 +paddq (%rax), %xmm2 + +paddsb %xmm0, %xmm2 +paddsb (%rax), %xmm2 + +paddsw %xmm0, %xmm2 +paddsw (%rax), %xmm2 + +paddusb %xmm0, %xmm2 +paddusb (%rax), %xmm2 + +paddusw %xmm0, %xmm2 +paddusw (%rax), %xmm2 + +paddw %xmm0, %xmm2 +paddw (%rax), %xmm2 + +pand %xmm0, %xmm2 +pand (%rax), %xmm2 + +pandn %xmm0, %xmm2 +pandn (%rax), %xmm2 + +pavgb %xmm0, %xmm2 +pavgb (%rax), %xmm2 + +pavgw %xmm0, %xmm2 +pavgw (%rax), %xmm2 + +pcmpeqb %xmm0, %xmm2 +pcmpeqb (%rax), %xmm2 + +pcmpeqd %xmm0, %xmm2 +pcmpeqd (%rax), %xmm2 + +pcmpeqw %xmm0, %xmm2 +pcmpeqw (%rax), %xmm2 + +pcmpgtb %xmm0, %xmm2 +pcmpgtb (%rax), %xmm2 + +pcmpgtd %xmm0, %xmm2 +pcmpgtd (%rax), %xmm2 + +pcmpgtw %xmm0, %xmm2 +pcmpgtw (%rax), %xmm2 + +pextrw $1, %xmm0, %rcx + +pinsrw $1, %rax, %xmm0 +pinsrw $1, (%rax), %xmm0 + +pmaddwd %xmm0, %xmm2 +pmaddwd (%rax), %xmm2 + +pmaxsw %xmm0, %xmm2 +pmaxsw (%rax), %xmm2 + +pmaxub %xmm0, %xmm2 +pmaxub (%rax), %xmm2 + +pminsw %xmm0, %xmm2 +pminsw (%rax), %xmm2 + +pminub %xmm0, %xmm2 +pminub (%rax), %xmm2 + +pmovmskb %xmm0, %rcx + +pmulhuw %xmm0, %xmm2 +pmulhuw (%rax), %xmm2 + +pmulhw %xmm0, %xmm2 +pmulhw (%rax), %xmm2 + +pmullw %xmm0, %xmm2 +pmullw (%rax), %xmm2 + +pmuludq %mm0, %mm2 +pmuludq (%rax), %mm2 + +pmuludq %xmm0, %xmm2 +pmuludq (%rax), %xmm2 + +por %xmm0, %xmm2 +por (%rax), %xmm2 + +psadbw %xmm0, %xmm2 +psadbw (%rax), %xmm2 + +pshufd $1, %xmm0, %xmm2 +pshufd $1, (%rax), %xmm2 + +pshufhw $1, %xmm0, %xmm2 +pshufhw $1, (%rax), %xmm2 + +pshuflw $1, %xmm0, %xmm2 +pshuflw $1, (%rax), %xmm2 + +pslld $1, %xmm2 +pslld %xmm0, %xmm2 +pslld (%rax), %xmm2 + +pslldq $1, %xmm2 + +psllq $1, %xmm2 +psllq %xmm0, %xmm2 +psllq (%rax), %xmm2 + +psllw $1, %xmm2 +psllw %xmm0, %xmm2 +psllw (%rax), %xmm2 + +psrad $1, %xmm2 +psrad %xmm0, %xmm2 +psrad (%rax), %xmm2 + +psraw $1, %xmm2 +psraw %xmm0, %xmm2 +psraw (%rax), %xmm2 + +psrld $1, %xmm2 +psrld %xmm0, %xmm2 +psrld (%rax), %xmm2 + +psrldq $1, %xmm2 + +psrlq $1, %xmm2 +psrlq %xmm0, %xmm2 +psrlq (%rax), %xmm2 + +psrlw $1, %xmm2 +psrlw %xmm0, %xmm2 +psrlw (%rax), %xmm2 + +psubb %xmm0, %xmm2 +psubb (%rax), %xmm2 + +psubd %xmm0, %xmm2 +psubd (%rax), %xmm2 + +psubq %mm0, %mm2 +psubq (%rax), %mm2 + +psubq %xmm0, %xmm2 +psubq (%rax), %xmm2 + +psubsb %xmm0, %xmm2 +psubsb (%rax), %xmm2 + +psubsw %xmm0, %xmm2 +psubsw (%rax), %xmm2 + +psubusb %xmm0, %xmm2 +psubusb (%rax), %xmm2 + +psubusw %xmm0, %xmm2 +psubusw (%rax), %xmm2 + +psubw %xmm0, %xmm2 +psubw (%rax), %xmm2 + +punpckhbw %xmm0, %xmm2 +punpckhbw (%rax), %xmm2 + +punpckhdq %xmm0, %xmm2 +punpckhdq (%rax), %xmm2 + +punpckhqdq %xmm0, %xmm2 +punpckhqdq (%rax), %xmm2 + +punpckhwd %xmm0, %xmm2 +punpckhwd (%rax), %xmm2 + +punpcklbw %xmm0, %xmm2 +punpcklbw (%rax), %xmm2 + +punpckldq %xmm0, %xmm2 +punpckldq (%rax), %xmm2 + +punpcklqdq %xmm0, %xmm2 +punpcklqdq (%rax), %xmm2 + +punpcklwd %xmm0, %xmm2 +punpcklwd (%rax), %xmm2 + +pxor %xmm0, %xmm2 +pxor (%rax), %xmm2 + +shufpd $1, %xmm0, %xmm2 +shufpd $1, (%rax), %xmm2 + +sqrtpd %xmm0, %xmm2 +sqrtpd (%rax), %xmm2 + +sqrtsd %xmm0, %xmm2 +sqrtsd (%rax), %xmm2 + +subpd %xmm0, %xmm2 +subpd (%rax), %xmm2 + +subsd %xmm0, %xmm2 +subsd (%rax), %xmm2 + +ucomisd %xmm0, %xmm1 +ucomisd (%rax), %xmm1 + +unpckhpd %xmm0, %xmm2 +unpckhpd (%rax), %xmm2 + +unpcklpd %xmm0, %xmm2 +unpcklpd (%rax), %xmm2 + +xorpd %xmm0, %xmm2 +xorpd (%rax), %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 0.50 addpd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * addpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 addsd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * addsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 andnpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * andnpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * andpd (%rax), %xmm2 +# CHECK-NEXT: 1 5 0.33 * * U clflush (%rax) +# CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * cmpeqsd (%rax), %xmm2 +# CHECK-NEXT: 2 4 1.00 comisd %xmm0, %xmm1 +# CHECK-NEXT: 2 11 1.00 * comisd (%rax), %xmm1 +# CHECK-NEXT: 1 3 0.50 cvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 cvtpd2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtpd2dq (%rax), %xmm2 +# CHECK-NEXT: 2 1 1.00 cvtpd2pi %xmm0, %mm2 +# CHECK-NEXT: 2 1 1.00 * cvtpd2pi (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: 2 2 3.00 cvtpi2pd %mm0, %xmm2 +# CHECK-NEXT: 2 2 3.00 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtps2dq (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 cvtps2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtps2pd (%rax), %xmm2 +# CHECK-NEXT: 2 2 1.00 cvtsd2si %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 cvtsd2si %xmm0, %rcx +# CHECK-NEXT: 2 9 1.00 * cvtsd2si (%rax), %ecx +# CHECK-NEXT: 2 9 1.00 * cvtsd2si (%rax), %rcx +# CHECK-NEXT: 1 3 0.50 cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: 2 4 1.00 cvtsi2sd %ecx, %xmm2 +# CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2 +# CHECK-NEXT: 1 10 1.00 * cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 1 10 1.00 * cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvtss2sd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 cvttpd2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvttpd2dq (%rax), %xmm2 +# CHECK-NEXT: 2 1 1.00 cvttpd2pi %xmm0, %mm2 +# CHECK-NEXT: 2 1 1.00 * cvttpd2pi (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * cvttps2dq (%rax), %xmm2 +# CHECK-NEXT: 2 2 1.00 cvttsd2si %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 cvttsd2si %xmm0, %rcx +# CHECK-NEXT: 2 9 1.00 * cvttsd2si (%rax), %ecx +# CHECK-NEXT: 2 9 1.00 * cvttsd2si (%rax), %rcx +# CHECK-NEXT: 1 13 5.00 divpd %xmm0, %xmm2 +# CHECK-NEXT: 1 20 5.00 * divpd (%rax), %xmm2 +# CHECK-NEXT: 1 13 5.00 divsd %xmm0, %xmm2 +# CHECK-NEXT: 1 20 5.00 * divsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 10.00 * * U lfence +# CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 1 1 0.50 maxpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * maxpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 maxsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * maxsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 25.00 * * U mfence +# CHECK-NEXT: 1 1 0.50 minpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * minpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 minsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * minsd (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 movapd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movapd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 movd %eax, %xmm2 +# CHECK-NEXT: 1 8 0.50 * movd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 movd %xmm0, %ecx +# CHECK-NEXT: 1 1 1.00 * movd %xmm0, (%rax) +# CHECK-NEXT: 1 0 0.25 movdqa %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movdqa (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 movdqu %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movdqu %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movdqu (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 movdq2q %xmm0, %mm2 +# CHECK-NEXT: 2 2 1.00 * movhpd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movhpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 * movlpd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movlpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 movmskpd %xmm0, %ecx +# CHECK-NEXT: 1 1 1.00 * movntil %eax, (%rax) +# CHECK-NEXT: 1 1 1.00 * movntiq %rax, (%rax) +# CHECK-NEXT: 1 1 1.00 * movntdq %xmm0, (%rax) +# CHECK-NEXT: 1 1 1.00 * movntpd %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.25 movq %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 movq %rax, %xmm2 +# CHECK-NEXT: 1 8 0.50 * movq (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 movq %xmm0, %rcx +# CHECK-NEXT: 1 1 1.00 * movq %xmm0, (%rax) +# CHECK-NEXT: 2 1 0.50 movq2dq %mm0, %xmm2 +# CHECK-NEXT: 1 1 0.50 movsd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movsd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movsd (%rax), %xmm2 +# CHECK-NEXT: 1 0 0.25 movupd %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax) +# CHECK-NEXT: 1 8 0.50 * movupd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 mulpd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * mulpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 mulsd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * mulsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 orpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * orpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 packssdw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * packssdw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 packsswb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * packsswb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 packuswb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * packuswb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 paddb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * paddb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 paddd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * paddd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 paddq %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * paddq (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 paddq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * paddq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddsb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * paddsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddsw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * paddsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddusb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * paddusb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 paddusw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * paddusw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 paddw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * paddw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pand %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pand (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pandn %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pandn (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pavgb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pavgb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pavgw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pavgw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pcmpeqb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pcmpeqb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pcmpeqd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pcmpeqd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pcmpeqw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pcmpeqw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pcmpgtb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pcmpgtb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pcmpgtd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pcmpgtd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pcmpgtw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pcmpgtw (%rax), %xmm2 +# CHECK-NEXT: 2 1 1.00 pextrw $1, %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 pinsrw $1, %eax, %xmm0 +# CHECK-NEXT: 1 8 1.50 * pinsrw $1, (%rax), %xmm0 +# CHECK-NEXT: 1 3 0.50 pmaddwd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmaddwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pmaxsw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmaxsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pmaxub %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmaxub (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pminsw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pminsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pminub %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pminub (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 pmovmskb %xmm0, %ecx +# CHECK-NEXT: 1 3 0.50 pmulhuw %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmulhuw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 pmulhw %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmulhw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 pmullw %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmullw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 pmuludq %mm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * pmuludq (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 pmuludq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmuludq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 por %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * por (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 psadbw %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * psadbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pshufd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pshufd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pshufhw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pshuflw $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pslld $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 pslld %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pslld (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pslldq $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psllq $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psllq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psllq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psllw $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psllw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psllw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psrad $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrad %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psrad (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psraw $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psraw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psraw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psrld $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrld %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psrld (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psrldq $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrlq $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrlq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psrlq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psrlw $1, %xmm2 +# CHECK-NEXT: 1 1 0.50 psrlw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psrlw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 psubb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psubb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 psubd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psubd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 psubq %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psubq (%rax), %mm2 +# CHECK-NEXT: 1 1 0.25 psubq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psubq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubsb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psubsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psubsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubusb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psubusb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psubusw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psubusw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 psubw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psubw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckhbw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * punpckhbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckhdq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * punpckhdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckhqdq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * punpckhqdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckhwd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * punpckhwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpcklbw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * punpcklbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpckldq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * punpckldq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpcklqdq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * punpcklqdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 punpcklwd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * punpcklwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pxor %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pxor (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 shufpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * shufpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 21 9.00 sqrtpd %xmm0, %xmm2 +# CHECK-NEXT: 1 28 9.00 * sqrtpd (%rax), %xmm2 +# CHECK-NEXT: 1 21 9.00 sqrtsd %xmm0, %xmm2 +# CHECK-NEXT: 1 28 9.00 * sqrtsd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 subpd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * subpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 subsd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * subsd (%rax), %xmm2 +# CHECK-NEXT: 2 4 1.00 ucomisd %xmm0, %xmm1 +# CHECK-NEXT: 2 11 1.00 * ucomisd (%rax), %xmm1 +# CHECK-NEXT: 1 1 0.50 unpckhpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * unpckhpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 unpcklpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * unpcklpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 xorpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * xorpd (%rax), %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 1.00 1.00 1.00 25.00 25.00 25.00 25.00 - 42.50 124.00 89.00 61.50 70.50 70.50 12.00 53.67 53.67 53.67 38.33 38.33 38.33 8.00 8.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - addpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - addsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - andnpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - andnpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - andpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - andpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - clflush (%rax) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - cmpeqpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpeqpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - cmpeqsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpeqsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - comisd %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - comisd (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtdq2pd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtdq2ps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtpd2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtpd2dq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtpd2pi %xmm0, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtpd2pi (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 3.00 3.00 - - - - - - - - - - - cvtpi2pd %mm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 3.00 3.00 - - - - - - - - - - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtps2dq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtps2pd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtps2pd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtsd2si %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtsd2si %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtsd2si (%rax), %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtsd2si (%rax), %rcx +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtsi2sd %ecx, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvtsi2sd %rcx, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtsi2sdl (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvtss2sd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvtss2sd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvttpd2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvttpd2dq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvttpd2pi %xmm0, %mm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvttpd2pi (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvttps2dq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvttsd2si %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - cvttsd2si %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvttsd2si (%rax), %ecx +# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - cvttsd2si (%rax), %rcx +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - divpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - divpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - divsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - divsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - - - - 10.00 10.00 10.00 - - - - - lfence +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 maskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - maxpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - maxpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - maxsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - maxsd (%rax), %xmm2 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - mfence +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - minpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - minpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - minsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - minsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - movapd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movapd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movapd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - movd %eax, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - movd %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - movdqa %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movdqa %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movdqa (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - movdqu %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movdqu %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movdqu (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - movdq2q %xmm0, %mm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movhpd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movhpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movlpd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movlpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - movmskpd %xmm0, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 movntil %eax, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 movntiq %rax, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movntdq %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movntpd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - movq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - movq %rax, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - movq %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movq %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - - - - - - - - - movq2dq %mm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - movsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movsd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - movupd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movupd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movupd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - mulpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - mulsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - orpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - orpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packssdw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packssdw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packsswb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packsswb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packuswb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packuswb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddq %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - paddsb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddsb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - paddsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - paddusb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddusb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - paddusw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddusw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - paddw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - paddw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pand %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pand (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pandn %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pandn (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pavgb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pavgb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pavgw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pavgw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpeqb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpeqb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpeqd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpeqd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpeqw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpeqw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpgtb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpgtb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpgtd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpgtd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpgtw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpgtw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pextrw $1, %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pinsrw $1, %eax, %xmm0 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pinsrw $1, (%rax), %xmm0 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmaddwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaddwd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pmaxsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaxsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pmaxub %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaxub (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminub %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminub (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - pmovmskb %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmulhuw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmulhuw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmulhw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmulhw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmullw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmullw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmuludq %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmuludq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmuludq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmuludq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - por %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - por (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - psadbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psadbw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pshufd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pshufd $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pshufhw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pshufhw $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pshuflw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pshuflw $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pslld $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pslld %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pslld (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pslldq $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psllq $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psllq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psllq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psllw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psllw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psllw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrad $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrad %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psrad (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psraw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psraw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psraw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrld $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrld %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psrld (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrldq $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrlq $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrlq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psrlq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrlw $1, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - psrlw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psrlw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - psubb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - psubd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - psubq %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubq (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - psubq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psubsb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubsb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psubsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psubusb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubusb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psubusw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubusw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - psubw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psubw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckhbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckhbw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckhdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckhdq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckhqdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckhqdq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckhwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckhwd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpcklbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpcklbw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpckldq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpckldq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpcklqdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpcklqdq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - punpcklwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - punpcklwd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pxor %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pxor (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - shufpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - shufpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - - - - - - - - - - - - sqrtpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - sqrtpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - - - - - - - - - - - - sqrtsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 9.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - sqrtsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - subpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - subpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - subsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - subsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - ucomisd %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - ucomisd (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - unpckhpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - unpckhpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - unpcklpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - unpcklpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - xorpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - xorpd (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s @@ -0,0 +1,119 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +addsubpd %xmm0, %xmm2 +addsubpd (%rax), %xmm2 + +addsubps %xmm0, %xmm2 +addsubps (%rax), %xmm2 + +haddpd %xmm0, %xmm2 +haddpd (%rax), %xmm2 + +haddps %xmm0, %xmm2 +haddps (%rax), %xmm2 + +hsubpd %xmm0, %xmm2 +hsubpd (%rax), %xmm2 + +hsubps %xmm0, %xmm2 +hsubps (%rax), %xmm2 + +lddqu (%rax), %xmm2 + +monitor + +movddup %xmm0, %xmm2 +movddup (%rax), %xmm2 + +movshdup %xmm0, %xmm2 +movshdup (%rax), %xmm2 + +movsldup %xmm0, %xmm2 +movsldup (%rax), %xmm2 + +mwait + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 0.50 addsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * addsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 addsubps %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * addsubps (%rax), %xmm2 +# CHECK-NEXT: 4 6 2.00 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 4 13 2.00 * haddpd (%rax), %xmm2 +# CHECK-NEXT: 4 6 2.00 haddps %xmm0, %xmm2 +# CHECK-NEXT: 4 13 2.00 * haddps (%rax), %xmm2 +# CHECK-NEXT: 4 6 2.00 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 4 13 2.00 * hsubpd (%rax), %xmm2 +# CHECK-NEXT: 4 6 2.00 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 4 13 2.00 * hsubps (%rax), %xmm2 +# CHECK-NEXT: 1 8 0.50 * lddqu (%rax), %xmm2 +# CHECK-NEXT: 100 100 25.00 U monitor +# CHECK-NEXT: 1 1 0.50 movddup %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movshdup %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * movshdup (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 movsldup %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * movsldup (%rax), %xmm2 +# CHECK-NEXT: 100 100 25.00 * * U mwait + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 50.00 50.00 50.00 50.00 - - 3.00 21.00 2.00 5.00 5.00 - 3.33 3.33 3.33 3.33 3.33 3.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - addsubpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addsubps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - addsubps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - haddpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - haddpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - haddps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - haddps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - hsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - hsubpd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - hsubps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 2.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - hsubps (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - lddqu (%rax), %xmm2 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - monitor +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - movddup %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movddup (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - movshdup %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movshdup (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - movsldup %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsldup (%rax), %xmm2 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - mwait diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s @@ -0,0 +1,381 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +blendpd $11, %xmm0, %xmm2 +blendpd $11, (%rax), %xmm2 + +blendps $11, %xmm0, %xmm2 +blendps $11, (%rax), %xmm2 + +blendvpd %xmm0, %xmm2 +blendvpd (%rax), %xmm2 + +blendvps %xmm0, %xmm2 +blendvps (%rax), %xmm2 + +dppd $22, %xmm0, %xmm2 +dppd $22, (%rax), %xmm2 + +dpps $22, %xmm0, %xmm2 +dpps $22, (%rax), %xmm2 + +extractps $1, %xmm0, %rcx +extractps $1, %xmm0, (%rax) + +insertps $1, %xmm0, %xmm2 +insertps $1, (%rax), %xmm2 + +movntdqa (%rax), %xmm2 + +mpsadbw $1, %xmm0, %xmm2 +mpsadbw $1, (%rax), %xmm2 + +packusdw %xmm0, %xmm2 +packusdw (%rax), %xmm2 + +pblendvb %xmm0, %xmm2 +pblendvb (%rax), %xmm2 + +pblendw $11, %xmm0, %xmm2 +pblendw $11, (%rax), %xmm2 + +pcmpeqq %xmm0, %xmm2 +pcmpeqq (%rax), %xmm2 + +pextrb $1, %xmm0, %ecx +pextrb $1, %xmm0, (%rax) + +pextrd $1, %xmm0, %ecx +pextrd $1, %xmm0, (%rax) + +pextrq $1, %xmm0, %rcx +pextrq $1, %xmm0, (%rax) + +pextrw $1, %xmm0, (%rax) + +phminposuw %xmm0, %xmm2 +phminposuw (%rax), %xmm2 + +pinsrb $1, %eax, %xmm1 +pinsrb $1, (%rax), %xmm1 + +pinsrd $1, %eax, %xmm1 +pinsrd $1, (%rax), %xmm1 + +pinsrq $1, %rax, %xmm1 +pinsrq $1, (%rax), %xmm1 + +pmaxsb %xmm0, %xmm2 +pmaxsb (%rax), %xmm2 + +pmaxsd %xmm0, %xmm2 +pmaxsd (%rax), %xmm2 + +pmaxud %xmm0, %xmm2 +pmaxud (%rax), %xmm2 + +pmaxuw %xmm0, %xmm2 +pmaxuw (%rax), %xmm2 + +pminsb %xmm0, %xmm2 +pminsb (%rax), %xmm2 + +pminsd %xmm0, %xmm2 +pminsd (%rax), %xmm2 + +pminud %xmm0, %xmm2 +pminud (%rax), %xmm2 + +pminuw %xmm0, %xmm2 +pminuw (%rax), %xmm2 + +pmovsxbd %xmm0, %xmm2 +pmovsxbd (%rax), %xmm2 + +pmovsxbq %xmm0, %xmm2 +pmovsxbq (%rax), %xmm2 + +pmovsxbw %xmm0, %xmm2 +pmovsxbw (%rax), %xmm2 + +pmovsxdq %xmm0, %xmm2 +pmovsxdq (%rax), %xmm2 + +pmovsxwd %xmm0, %xmm2 +pmovsxwd (%rax), %xmm2 + +pmovsxwq %xmm0, %xmm2 +pmovsxwq (%rax), %xmm2 + +pmovzxbd %xmm0, %xmm2 +pmovzxbd (%rax), %xmm2 + +pmovzxbq %xmm0, %xmm2 +pmovzxbq (%rax), %xmm2 + +pmovzxbw %xmm0, %xmm2 +pmovzxbw (%rax), %xmm2 + +pmovzxdq %xmm0, %xmm2 +pmovzxdq (%rax), %xmm2 + +pmovzxwd %xmm0, %xmm2 +pmovzxwd (%rax), %xmm2 + +pmovzxwq %xmm0, %xmm2 +pmovzxwq (%rax), %xmm2 + +pmuldq %xmm0, %xmm2 +pmuldq (%rax), %xmm2 + +pmulld %xmm0, %xmm2 +pmulld (%rax), %xmm2 + +ptest %xmm0, %xmm1 +ptest (%rax), %xmm1 + +roundpd $1, %xmm0, %xmm2 +roundpd $1, (%rax), %xmm2 + +roundps $1, %xmm0, %xmm2 +roundps $1, (%rax), %xmm2 + +roundsd $1, %xmm0, %xmm2 +roundsd $1, (%rax), %xmm2 + +roundss $1, %xmm0, %xmm2 +roundss $1, (%rax), %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 blendpd $11, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * blendpd $11, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 blendps $11, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * blendps $11, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 blendvpd %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 blendvps %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 3 9 3.00 dppd $22, %xmm0, %xmm2 +# CHECK-NEXT: 5 16 3.00 * dppd $22, (%rax), %xmm2 +# CHECK-NEXT: 8 15 4.00 dpps $22, %xmm0, %xmm2 +# CHECK-NEXT: 10 22 4.00 * dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 2 1 1.00 extractps $1, %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 * extractps $1, %xmm0, (%rax) +# CHECK-NEXT: 1 1 0.50 insertps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * insertps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 8 0.50 * movntdqa (%rax), %xmm2 +# CHECK-NEXT: 4 4 2.00 mpsadbw $1, %xmm0, %xmm2 +# CHECK-NEXT: 6 11 2.00 * mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 packusdw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * packusdw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pblendvb %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pblendw $11, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pblendw $11, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pcmpeqq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pcmpeqq (%rax), %xmm2 +# CHECK-NEXT: 2 1 1.00 pextrb $1, %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 * pextrb $1, %xmm0, (%rax) +# CHECK-NEXT: 2 1 1.00 pextrd $1, %xmm0, %ecx +# CHECK-NEXT: 2 2 1.00 * pextrd $1, %xmm0, (%rax) +# CHECK-NEXT: 2 1 1.00 pextrq $1, %xmm0, %rcx +# CHECK-NEXT: 2 2 1.00 * pextrq $1, %xmm0, (%rax) +# CHECK-NEXT: 2 2 1.00 * pextrw $1, %xmm0, (%rax) +# CHECK-NEXT: 1 3 0.50 phminposuw %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * phminposuw (%rax), %xmm2 +# CHECK-NEXT: 2 2 1.00 pinsrb $1, %eax, %xmm1 +# CHECK-NEXT: 1 8 1.50 * pinsrb $1, (%rax), %xmm1 +# CHECK-NEXT: 2 2 1.00 pinsrd $1, %eax, %xmm1 +# CHECK-NEXT: 1 8 1.50 * pinsrd $1, (%rax), %xmm1 +# CHECK-NEXT: 2 2 1.00 pinsrq $1, %rax, %xmm1 +# CHECK-NEXT: 1 8 1.50 * pinsrq $1, (%rax), %xmm1 +# CHECK-NEXT: 1 1 0.25 pmaxsb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmaxsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pmaxsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmaxsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pmaxud %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmaxud (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pmaxuw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmaxuw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pminsb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pminsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pminsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pminsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pminud %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pminud (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pminuw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pminuw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovsxbd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovsxbq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovsxbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovsxdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovsxwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovsxwq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovzxbd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovzxbq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovzxbw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovzxdq (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovzxwd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pmovzxwq (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 pmuldq %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmuldq (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 pmulld %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmulld (%rax), %xmm2 +# CHECK-NEXT: 2 1 1.00 ptest %xmm0, %xmm1 +# CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1 +# CHECK-NEXT: 1 3 0.50 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * roundss $1, (%rax), %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - - - - - - 31.00 43.00 28.00 16.00 35.50 35.50 7.00 16.33 16.33 16.33 14.67 14.67 14.67 2.50 2.50 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - blendpd $11, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - blendpd $11, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - blendps $11, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - blendps $11, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - blendvpd %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - blendvps %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - - - - - - - - - - - - dppd $22, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - dppd $22, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - - - - - - - - - - - - dpps $22, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - extractps $1, %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 extractps $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - insertps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - insertps $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - movntdqa (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - mpsadbw $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - mpsadbw $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - packusdw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - packusdw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pblendvb %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pblendvb %xmm0, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pblendw $11, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pblendw $11, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpeqq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpeqq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pextrb $1, %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 pextrb $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pextrd $1, %xmm0, %ecx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 pextrd $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pextrq $1, %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 pextrq $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 pextrw $1, %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - phminposuw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phminposuw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pinsrb $1, %eax, %xmm1 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pinsrb $1, (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pinsrd $1, %eax, %xmm1 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pinsrd $1, (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - pinsrq $1, %rax, %xmm1 +# CHECK-NEXT: - - - - - - - - - - - - 1.50 1.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pinsrq $1, (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pmaxsb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaxsb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pmaxsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaxsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pmaxud %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaxud (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pmaxuw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaxuw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminsb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminsb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminud %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminud (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pminuw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pminuw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxbd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxbd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxbq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxbq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxbw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxdq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxwd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovsxwq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovsxwq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxbd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxbd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxbq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxbq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxbw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxbw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxdq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxdq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxwd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxwd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pmovzxwq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmovzxwq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmuldq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmuldq (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmulld %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmulld (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 1.00 - - - - - - - - ptest %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 1.00 1.00 1.00 0.33 0.33 0.33 0.33 0.33 0.33 - - ptest (%rax), %xmm1 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - roundps $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +crc32b %al, %ecx +crc32b (%rax), %ecx + +crc32l %eax, %ecx +crc32l (%rax), %ecx + +crc32w %ax, %ecx +crc32w (%rax), %ecx + +crc32b %al, %rcx +crc32b (%rax), %rcx + +crc32q %rax, %rcx +crc32q (%rax), %rcx + +pcmpestri $1, %xmm0, %xmm2 +pcmpestri $1, (%rax), %xmm2 + +pcmpestrm $1, %xmm0, %xmm2 +pcmpestrm $1, (%rax), %xmm2 + +pcmpistri $1, %xmm0, %xmm2 +pcmpistri $1, (%rax), %xmm2 + +pcmpistrm $1, %xmm0, %xmm2 +pcmpistrm $1, (%rax), %xmm2 + +pcmpgtq %xmm0, %xmm2 +pcmpgtq (%rax), %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 crc32b %al, %ecx +# CHECK-NEXT: 1 7 1.00 * crc32b (%rax), %ecx +# CHECK-NEXT: 1 3 1.00 crc32l %eax, %ecx +# CHECK-NEXT: 1 7 1.00 * crc32l (%rax), %ecx +# CHECK-NEXT: 1 3 1.00 crc32w %ax, %ecx +# CHECK-NEXT: 1 7 1.00 * crc32w (%rax), %ecx +# CHECK-NEXT: 1 3 1.00 crc32b %al, %rcx +# CHECK-NEXT: 1 7 1.00 * crc32b (%rax), %rcx +# CHECK-NEXT: 1 3 1.00 crc32q %rax, %rcx +# CHECK-NEXT: 1 7 1.00 * crc32q (%rax), %rcx +# CHECK-NEXT: 8 6 3.00 pcmpestri $1, %xmm0, %xmm2 +# CHECK-NEXT: 12 13 3.00 * pcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: 7 6 3.00 pcmpestrm $1, %xmm0, %xmm2 +# CHECK-NEXT: 12 13 3.00 * pcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: 4 2 2.00 pcmpistri $1, %xmm0, %xmm2 +# CHECK-NEXT: 4 9 2.00 * pcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: 3 6 2.00 pcmpistrm $1, %xmm0, %xmm2 +# CHECK-NEXT: 4 13 2.00 * pcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.25 pcmpgtq %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pcmpgtq (%rax), %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 1.67 1.67 1.67 - 10.00 - - - 20.50 20.50 20.50 20.50 2.50 2.50 - 3.33 3.33 3.33 3.33 3.33 3.33 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - crc32b %al, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - crc32b (%rax), %ecx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - crc32l %eax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - crc32l (%rax), %ecx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - crc32w %ax, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - crc32w (%rax), %ecx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - crc32b %al, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - crc32b (%rax), %rcx +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - crc32q %rax, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - crc32q (%rax), %rcx +# CHECK-NEXT: - - - - - - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - pcmpestri $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 3.00 3.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpestri $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - pcmpestrm $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 3.00 3.00 3.00 3.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpestrm $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - pcmpistri $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpistri $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - pcmpistrm $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 2.00 2.00 2.00 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpistrm $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - pcmpgtq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pcmpgtq (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s @@ -0,0 +1,65 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +extrq %xmm0, %xmm2 +extrq $22, $2, %xmm2 + +insertq %xmm0, %xmm2 +insertq $22, $22, %xmm0, %xmm2 + +movntsd %xmm0, (%rax) +movntss %xmm0, (%rax) + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 0.50 extrq %xmm0, %xmm2 +# CHECK-NEXT: 2 3 0.50 extrq $22, $2, %xmm2 +# CHECK-NEXT: 1 3 0.50 insertq %xmm0, %xmm2 +# CHECK-NEXT: 2 3 0.50 insertq $22, $22, %xmm0, %xmm2 +# CHECK-NEXT: 1 1 1.00 * movntsd %xmm0, (%rax) +# CHECK-NEXT: 1 1 1.00 * movntss %xmm0, (%rax) + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - 3.00 3.00 2.00 0.67 0.67 0.67 - - - 1.00 1.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - - - - - - - - extrq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - - - - - - - - extrq $22, $2, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - - - - - - - - insertq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - - - - - - - - insertq $22, $22, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movntsd %xmm0, (%rax) +# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 1.00 0.33 0.33 0.33 - - - 0.50 0.50 movntss %xmm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s @@ -0,0 +1,268 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +pabsb %mm0, %mm2 +pabsb (%rax), %mm2 + +pabsb %xmm0, %xmm2 +pabsb (%rax), %xmm2 + +pabsd %mm0, %mm2 +pabsd (%rax), %mm2 + +pabsd %xmm0, %xmm2 +pabsd (%rax), %xmm2 + +pabsw %mm0, %mm2 +pabsw (%rax), %mm2 + +pabsw %xmm0, %xmm2 +pabsw (%rax), %xmm2 + +palignr $1, %mm0, %mm2 +palignr $1, (%rax), %mm2 + +palignr $1, %xmm0, %xmm2 +palignr $1, (%rax), %xmm2 + +phaddd %mm0, %mm2 +phaddd (%rax), %mm2 + +phaddd %xmm0, %xmm2 +phaddd (%rax), %xmm2 + +phaddsw %mm0, %mm2 +phaddsw (%rax), %mm2 + +phaddsw %xmm0, %xmm2 +phaddsw (%rax), %xmm2 + +phaddw %mm0, %mm2 +phaddw (%rax), %mm2 + +phaddw %xmm0, %xmm2 +phaddw (%rax), %xmm2 + +phsubd %mm0, %mm2 +phsubd (%rax), %mm2 + +phsubd %xmm0, %xmm2 +phsubd (%rax), %xmm2 + +phsubsw %mm0, %mm2 +phsubsw (%rax), %mm2 + +phsubsw %xmm0, %xmm2 +phsubsw (%rax), %xmm2 + +phsubw %mm0, %mm2 +phsubw (%rax), %mm2 + +phsubw %xmm0, %xmm2 +phsubw (%rax), %xmm2 + +pmaddubsw %mm0, %mm2 +pmaddubsw (%rax), %mm2 + +pmaddubsw %xmm0, %xmm2 +pmaddubsw (%rax), %xmm2 + +pmulhrsw %mm0, %mm2 +pmulhrsw (%rax), %mm2 + +pmulhrsw %xmm0, %xmm2 +pmulhrsw (%rax), %xmm2 + +pshufb %mm0, %mm2 +pshufb (%rax), %mm2 + +pshufb %xmm0, %xmm2 +pshufb (%rax), %xmm2 + +psignb %mm0, %mm2 +psignb (%rax), %mm2 + +psignb %xmm0, %xmm2 +psignb (%rax), %xmm2 + +psignd %mm0, %mm2 +psignd (%rax), %mm2 + +psignd %xmm0, %xmm2 +psignd (%rax), %xmm2 + +psignw %mm0, %mm2 +psignw (%rax), %mm2 + +psignw %xmm0, %xmm2 +psignw (%rax), %xmm2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.50 pabsb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pabsb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pabsb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pabsb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pabsd %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pabsd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pabsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pabsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pabsw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pabsw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pabsw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pabsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 palignr $1, %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * palignr $1, (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * palignr $1, (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 phaddd %mm0, %mm2 +# CHECK-NEXT: 4 9 2.00 * phaddd (%rax), %mm2 +# CHECK-NEXT: 4 2 2.00 phaddd %xmm0, %xmm2 +# CHECK-NEXT: 4 9 2.00 * phaddd (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 phaddsw %mm0, %mm2 +# CHECK-NEXT: 4 9 2.00 * phaddsw (%rax), %mm2 +# CHECK-NEXT: 4 2 2.00 phaddsw %xmm0, %xmm2 +# CHECK-NEXT: 4 9 2.00 * phaddsw (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 phaddw %mm0, %mm2 +# CHECK-NEXT: 4 9 2.00 * phaddw (%rax), %mm2 +# CHECK-NEXT: 4 2 2.00 phaddw %xmm0, %xmm2 +# CHECK-NEXT: 4 9 2.00 * phaddw (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 phsubd %mm0, %mm2 +# CHECK-NEXT: 4 9 2.00 * phsubd (%rax), %mm2 +# CHECK-NEXT: 4 2 2.00 phsubd %xmm0, %xmm2 +# CHECK-NEXT: 4 9 2.00 * phsubd (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 phsubsw %mm0, %mm2 +# CHECK-NEXT: 4 9 2.00 * phsubsw (%rax), %mm2 +# CHECK-NEXT: 4 2 2.00 phsubsw %xmm0, %xmm2 +# CHECK-NEXT: 4 9 2.00 * phsubsw (%rax), %xmm2 +# CHECK-NEXT: 3 2 2.00 phsubw %mm0, %mm2 +# CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %mm2 +# CHECK-NEXT: 4 2 2.00 phsubw %xmm0, %xmm2 +# CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 pmaddubsw %mm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * pmaddubsw (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 pmaddubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.50 pmulhrsw %mm0, %mm2 +# CHECK-NEXT: 1 10 0.50 * pmulhrsw (%rax), %mm2 +# CHECK-NEXT: 1 3 0.50 pmulhrsw %xmm0, %xmm2 +# CHECK-NEXT: 1 10 0.50 * pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 pshufb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * pshufb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 pshufb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * pshufb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignb %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psignb (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psignb (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignd %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psignd (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psignd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psignd (%rax), %xmm2 +# CHECK-NEXT: 1 1 0.50 psignw %mm0, %mm2 +# CHECK-NEXT: 1 8 0.50 * psignw (%rax), %mm2 +# CHECK-NEXT: 1 1 0.50 psignw %xmm0, %xmm2 +# CHECK-NEXT: 1 8 0.50 * psignw (%rax), %xmm2 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - - - - - - 61.00 13.00 7.00 7.00 16.00 16.00 - 10.67 10.67 10.67 10.67 10.67 10.67 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pabsb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pabsb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pabsb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pabsb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pabsd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pabsd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pabsd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pabsd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pabsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pabsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - pabsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pabsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - palignr $1, %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - palignr $1, (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - palignr $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - palignr $1, (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phaddd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phaddd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phaddd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phaddd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phaddsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phaddsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phaddsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phaddsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phaddw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phaddw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phaddw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phaddw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phsubd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phsubd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phsubd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phsubd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phsubsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phsubsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phsubsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phsubsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phsubw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phsubw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - phsubw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.00 - - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - phsubw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmaddubsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaddubsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmaddubsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmulhrsw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmulhrsw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - pmulhrsw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pshufb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pshufb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - pshufb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pshufb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psignb %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psignb (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psignb %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psignb (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psignd %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psignd (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psignd %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psignd (%rax), %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psignw %mm0, %mm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psignw (%rax), %mm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - psignw %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - psignw (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s @@ -0,0 +1,93 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +aaa + +aad +aad $7 + +aam +aam $7 + +aas + +bound %bx, (%eax) +bound %ebx, (%eax) + +daa + +das + +into + +leave + +salc + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 100 100 25.00 aaa +# CHECK-NEXT: 100 100 25.00 aad +# CHECK-NEXT: 100 100 25.00 aad $7 +# CHECK-NEXT: 100 100 25.00 aam +# CHECK-NEXT: 100 100 25.00 aam $7 +# CHECK-NEXT: 100 100 25.00 aas +# CHECK-NEXT: 100 100 25.00 U bound %bx, (%eax) +# CHECK-NEXT: 100 100 25.00 U bound %ebx, (%eax) +# CHECK-NEXT: 100 100 25.00 daa +# CHECK-NEXT: 100 100 25.00 das +# CHECK-NEXT: 100 100 25.00 U into +# CHECK-NEXT: 1 1 0.25 * leave +# CHECK-NEXT: 1 1 0.25 U salc + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: - - - 275.50 275.50 275.50 275.50 - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - aaa +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - aad +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - aad $7 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - aam +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - aam $7 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - aas +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - bound %bx, (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - bound %ebx, (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - daa +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - das +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - into +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leave +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - salc diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s @@ -0,0 +1,2491 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +adcb $0, %al +adcb $0, %dil +adcb $0, (%rax) +adcb $7, %al +adcb $7, %dil +adcb $7, (%rax) +adcb %sil, %dil +adcb %sil, (%rax) +adcb (%rax), %dil + +adcw $0, %ax +adcw $0, %di +adcw $0, (%rax) +adcw $511, %ax +adcw $511, %di +adcw $511, (%rax) +adcw $7, %di +adcw $7, (%rax) +adcw %si, %di +adcw %si, (%rax) +adcw (%rax), %di + +adcl $0, %eax +adcl $0, %edi +adcl $0, (%rax) +adcl $665536, %eax +adcl $665536, %edi +adcl $665536, (%rax) +adcl $7, %edi +adcl $7, (%rax) +adcl %esi, %edi +adcl %esi, (%rax) +adcl (%rax), %edi + +adcq $0, %rax +adcq $0, %rdi +adcq $0, (%rax) +adcq $665536, %rax +adcq $665536, %rdi +adcq $665536, (%rax) +adcq $7, %rdi +adcq $7, (%rax) +adcq %rsi, %rdi +adcq %rsi, (%rax) +adcq (%rax), %rdi + +addb $7, %al +addb $7, %dil +addb $7, (%rax) +addb %sil, %dil +addb %sil, (%rax) +addb (%rax), %dil + +addw $511, %ax +addw $511, %di +addw $511, (%rax) +addw $7, %di +addw $7, (%rax) +addw %si, %di +addw %si, (%rax) +addw (%rax), %di + +addl $665536, %eax +addl $665536, %edi +addl $665536, (%rax) +addl $7, %edi +addl $7, (%rax) +addl %esi, %edi +addl %esi, (%rax) +addl (%rax), %edi + +addq $665536, %rax +addq $665536, %rdi +addq $665536, (%rax) +addq $7, %rdi +addq $7, (%rax) +addq %rsi, %rdi +addq %rsi, (%rax) +addq (%rax), %rdi + +andb $7, %al +andb $7, %dil +andb $7, (%rax) +andb %sil, %dil +andb %sil, (%rax) +andb (%rax), %dil + +andw $511, %ax +andw $511, %di +andw $511, (%rax) +andw $7, %di +andw $7, (%rax) +andw %si, %di +andw %si, (%rax) +andw (%rax), %di + +andl $665536, %eax +andl $665536, %edi +andl $665536, (%rax) +andl $7, %edi +andl $7, (%rax) +andl %esi, %edi +andl %esi, (%rax) +andl (%rax), %edi + +andq $665536, %rax +andq $665536, %rdi +andq $665536, (%rax) +andq $7, %rdi +andq $7, (%rax) +andq %rsi, %rdi +andq %rsi, (%rax) +andq (%rax), %rdi + +bsfw %si, %di +bsrw %si, %di +bsfw (%rax), %di +bsrw (%rax), %di + +bsfl %esi, %edi +bsrl %esi, %edi +bsfl (%rax), %edi +bsrl (%rax), %edi + +bsfq %rsi, %rdi +bsrq %rsi, %rdi +bsfq (%rax), %rdi +bsrq (%rax), %rdi + +bswap %eax +bswap %rax + +btw %si, %di +btcw %si, %di +btrw %si, %di +btsw %si, %di +btw %si, (%rax) +btcw %si, (%rax) +btrw %si, (%rax) +btsw %si, (%rax) +btw $7, %di +btcw $7, %di +btrw $7, %di +btsw $7, %di +btw $7, (%rax) +btcw $7, (%rax) +btrw $7, (%rax) +btsw $7, (%rax) + +btl %esi, %edi +btcl %esi, %edi +btrl %esi, %edi +btsl %esi, %edi +btl %esi, (%rax) +btcl %esi, (%rax) +btrl %esi, (%rax) +btsl %esi, (%rax) +btl $7, %edi +btcl $7, %edi +btrl $7, %edi +btsl $7, %edi +btl $7, (%rax) +btcl $7, (%rax) +btrl $7, (%rax) +btsl $7, (%rax) + +btq %rsi, %rdi +btcq %rsi, %rdi +btrq %rsi, %rdi +btsq %rsi, %rdi +btq %rsi, (%rax) +btcq %rsi, (%rax) +btrq %rsi, (%rax) +btsq %rsi, (%rax) +btq $7, %rdi +btcq $7, %rdi +btrq $7, %rdi +btsq $7, %rdi +btq $7, (%rax) +btcq $7, (%rax) +btrq $7, (%rax) +btsq $7, (%rax) + +cbw +cwde +cdqe +cwd +cdq +cqo + +clc +cld +cmc + +cmpb $7, %al +cmpb $7, %dil +cmpb $7, (%rax) +cmpb %sil, %dil +cmpb %sil, (%rax) +cmpb (%rax), %dil + +cmpw $511, %ax +cmpw $511, %di +cmpw $511, (%rax) +cmpw $7, %di +cmpw $7, (%rax) +cmpw %si, %di +cmpw %si, (%rax) +cmpw (%rax), %di + +cmpl $665536, %eax +cmpl $665536, %edi +cmpl $665536, (%rax) +cmpl $7, %edi +cmpl $7, (%rax) +cmpl %esi, %edi +cmpl %esi, (%rax) +cmpl (%rax), %edi + +cmpq $665536, %rax +cmpq $665536, %rdi +cmpq $665536, (%rax) +cmpq $7, %rdi +cmpq $7, (%rax) +cmpq %rsi, %rdi +cmpq %rsi, (%rax) +cmpq (%rax), %rdi + +cmpsb +cmpsw +cmpsl +cmpsq + +cmpxchgb %cl, %bl +cmpxchgb %cl, (%rbx) + +cmpxchgw %cx, %bx +cmpxchgw %cx, (%rbx) + +cmpxchgl %ecx, %ebx +cmpxchgl %ecx, (%rbx) + +cmpxchgq %rcx, %rbx +cmpxchgq %rcx, (%rbx) + +cpuid + +decb %dil +decb (%rax) +decw %di +decw (%rax) +decl %edi +decl (%rax) +decq %rdi +decq (%rax) + +divb %dil +divb (%rax) +divw %si +divw (%rax) +divl %edx +divl (%rax) +divq %rcx +divq (%rax) + +enter $7, $4095 + +idivb %dil +idivb (%rax) +idivw %si +idivw (%rax) +idivl %edx +idivl (%rax) +idivq %rcx +idivq (%rax) + +imulb %dil +imulb (%rax) + +imulw %di +imulw (%rax) +imulw %si, %di +imulw (%rax), %di +imulw $511, %si, %di +imulw $511, (%rax), %di +imulw $7, %si, %di +imulw $7, (%rax), %di + +imull %edi +imull (%rax) +imull %esi, %edi +imull (%rax), %edi +imull $665536, %esi, %edi +imull $665536, (%rax), %edi +imull $7, %esi, %edi +imull $7, (%rax), %edi + +imulq %rdi +imulq (%rax) +imulq %rsi, %rdi +imulq (%rax), %rdi +imulq $665536, %rsi, %rdi +imulq $665536, (%rax), %rdi +imulq $7, %rsi, %rdi +imulq $7, (%rax), %rdi + +inb $7, %al +inb %dx, %al +inw $7, %ax +inw %dx, %ax +inl $7, %eax +inl %dx, %eax + +incb %dil +incb (%rax) +incw %di +incw (%rax) +incl %edi +incl (%rax) +incq %rdi +incq (%rax) + +insb +insw +insl + +int $7 + +invlpg (%rax) +invlpga %rax, %ecx + +lahf + +leave + +lodsb +lodsw +lodsl +lodsq + +movsb +movsw +movsl +movsq + +movsbw %al, %di +movzbw %al, %di +movsbw (%rax), %di +movzbw (%rax), %di +movsbl %al, %edi +movzbl %al, %edi +movsbl (%rax), %edi +movzbl (%rax), %edi +movsbq %al, %rdi +movzbq %al, %rdi +movsbq (%rax), %rdi +movzbq (%rax), %rdi + +movswl %ax, %edi +movzwl %ax, %edi +movswl (%rax), %edi +movzwl (%rax), %edi +movswq %ax, %rdi +movzwq %ax, %rdi +movswq (%rax), %rdi +movzwq (%rax), %rdi + +movslq %eax, %rdi +movslq (%rax), %rdi + +mulb %dil +mulb (%rax) +mulw %si +mulw (%rax) +mull %edx +mull (%rax) +mulq %rcx +mulq (%rax) + +negb %dil +negb (%r8) +negw %si +negw (%r9) +negl %edx +negl (%rax) +negq %rcx +negq (%r10) + +nop +nopw %di +nopw (%rcx) +nopl %esi +nopl (%r8) +nopq %rdx +nopq (%r9) + +notb %dil +notb (%r8) +notw %si +notw (%r9) +notl %edx +notl (%rax) +notq %rcx +notq (%r10) + +orb $7, %al +orb $7, %dil +orb $7, (%rax) +orb %sil, %dil +orb %sil, (%rax) +orb (%rax), %dil + +orw $511, %ax +orw $511, %di +orw $511, (%rax) +orw $7, %di +orw $7, (%rax) +orw %si, %di +orw %si, (%rax) +orw (%rax), %di + +orl $665536, %eax +orl $665536, %edi +orl $665536, (%rax) +orl $7, %edi +orl $7, (%rax) +orl %esi, %edi +orl %esi, (%rax) +orl (%rax), %edi + +orq $665536, %rax +orq $665536, %rdi +orq $665536, (%rax) +orq $7, %rdi +orq $7, (%rax) +orq %rsi, %rdi +orq %rsi, (%rax) +orq (%rax), %rdi + +outb %al, $7 +outb %al, %dx +outw %ax, $7 +outw %ax, %dx +outl %eax, $7 +outl %eax, %dx + +outsb +outsw +outsl + +pause + +rclb %dil +rcrb %dil +rclb (%rax) +rcrb (%rax) +rclb $7, %dil +rcrb $7, %dil +rclb $7, (%rax) +rcrb $7, (%rax) +rclb %cl, %dil +rcrb %cl, %dil +rclb %cl, (%rax) +rcrb %cl, (%rax) + +rclw %di +rcrw %di +rclw (%rax) +rcrw (%rax) +rclw $7, %di +rcrw $7, %di +rclw $7, (%rax) +rcrw $7, (%rax) +rclw %cl, %di +rcrw %cl, %di +rclw %cl, (%rax) +rcrw %cl, (%rax) + +rcll %edi +rcrl %edi +rcll (%rax) +rcrl (%rax) +rcll $7, %edi +rcrl $7, %edi +rcll $7, (%rax) +rcrl $7, (%rax) +rcll %cl, %edi +rcrl %cl, %edi +rcll %cl, (%rax) +rcrl %cl, (%rax) + +rclq %rdi +rcrq %rdi +rclq (%rax) +rcrq (%rax) +rclq $7, %rdi +rcrq $7, %rdi +rclq $7, (%rax) +rcrq $7, (%rax) +rclq %cl, %rdi +rcrq %cl, %rdi +rclq %cl, (%rax) +rcrq %cl, (%rax) + +rdmsr +rdpmc +rdtsc +rdtscp + +rolb %dil +rorb %dil +rolb (%rax) +rorb (%rax) +rolb $7, %dil +rorb $7, %dil +rolb $7, (%rax) +rorb $7, (%rax) +rolb %cl, %dil +rorb %cl, %dil +rolb %cl, (%rax) +rorb %cl, (%rax) + +rolw %di +rorw %di +rolw (%rax) +rorw (%rax) +rolw $7, %di +rorw $7, %di +rolw $7, (%rax) +rorw $7, (%rax) +rolw %cl, %di +rorw %cl, %di +rolw %cl, (%rax) +rorw %cl, (%rax) + +roll %edi +rorl %edi +roll (%rax) +rorl (%rax) +roll $7, %edi +rorl $7, %edi +roll $7, (%rax) +rorl $7, (%rax) +roll %cl, %edi +rorl %cl, %edi +roll %cl, (%rax) +rorl %cl, (%rax) + +rolq %rdi +rorq %rdi +rolq (%rax) +rorq (%rax) +rolq $7, %rdi +rorq $7, %rdi +rolq $7, (%rax) +rorq $7, (%rax) +rolq %cl, %rdi +rorq %cl, %rdi +rolq %cl, (%rax) +rorq %cl, (%rax) + +sahf + +sarb %dil +shlb %dil +shrb %dil +sarb (%rax) +shlb (%rax) +shrb (%rax) +sarb $7, %dil +shlb $7, %dil +shrb $7, %dil +sarb $7, (%rax) +shlb $7, (%rax) +shrb $7, (%rax) +sarb %cl, %dil +shlb %cl, %dil +shrb %cl, %dil +sarb %cl, (%rax) +shlb %cl, (%rax) +shrb %cl, (%rax) + +sarw %di +shlw %di +shrw %di +sarw (%rax) +shlw (%rax) +shrw (%rax) +sarw $7, %di +shlw $7, %di +shrw $7, %di +sarw $7, (%rax) +shlw $7, (%rax) +shrw $7, (%rax) +sarw %cl, %di +shlw %cl, %di +shrw %cl, %di +sarw %cl, (%rax) +shlw %cl, (%rax) +shrw %cl, (%rax) + +sarl %edi +shll %edi +shrl %edi +sarl (%rax) +shll (%rax) +shrl (%rax) +sarl $7, %edi +shll $7, %edi +shrl $7, %edi +sarl $7, (%rax) +shll $7, (%rax) +shrl $7, (%rax) +sarl %cl, %edi +shll %cl, %edi +shrl %cl, %edi +sarl %cl, (%rax) +shll %cl, (%rax) +shrl %cl, (%rax) + +sarq %rdi +shlq %rdi +shrq %rdi +sarq (%rax) +shlq (%rax) +shrq (%rax) +sarq $7, %rdi +shlq $7, %rdi +shrq $7, %rdi +sarq $7, (%rax) +shlq $7, (%rax) +shrq $7, (%rax) +sarq %cl, %rdi +shlq %cl, %rdi +shrq %cl, %rdi +sarq %cl, (%rax) +shlq %cl, (%rax) +shrq %cl, (%rax) + +sbbb $0, %al +sbbb $0, %dil +sbbb $0, (%rax) +sbbb $7, %al +sbbb $7, %dil +sbbb $7, (%rax) +sbbb %sil, %dil +sbbb %sil, (%rax) +sbbb (%rax), %dil + +sbbw $0, %ax +sbbw $0, %di +sbbw $0, (%rax) +sbbw $511, %ax +sbbw $511, %di +sbbw $511, (%rax) +sbbw $7, %di +sbbw $7, (%rax) +sbbw %si, %di +sbbw %si, (%rax) +sbbw (%rax), %di + +sbbl $0, %eax +sbbl $0, %edi +sbbl $0, (%rax) +sbbl $665536, %eax +sbbl $665536, %edi +sbbl $665536, (%rax) +sbbl $7, %edi +sbbl $7, (%rax) +sbbl %esi, %edi +sbbl %esi, (%rax) +sbbl (%rax), %edi + +sbbq $0, %rax +sbbq $0, %rdi +sbbq $0, (%rax) +sbbq $665536, %rax +sbbq $665536, %rdi +sbbq $665536, (%rax) +sbbq $7, %rdi +sbbq $7, (%rax) +sbbq %rsi, %rdi +sbbq %rsi, (%rax) +sbbq (%rax), %rdi + +scasb +scasw +scasl +scasq + +seto %al +seto (%rax) +setno %al +setno (%rax) +setb %al +setb (%rax) +setnb %al +setnb (%rax) +setz %al +setz (%rax) +setnz %al +setnz (%rax) +seta %al +seta (%rax) +setna %al +setna (%rax) +sets %al +sets (%rax) +setns %al +setns (%rax) +setp %al +setp (%rax) +setnp %al +setnp (%rax) +setl %al +setl (%rax) +setnl %al +setnl (%rax) +setg %al +setg (%rax) +setng %al +setng (%rax) + +shldw %cl, %si, %di +shrdw %cl, %si, %di +shldw %cl, %si, (%rax) +shrdw %cl, %si, (%rax) +shldw $7, %si, %di +shrdw $7, %si, %di +shldw $7, %si, (%rax) +shrdw $7, %si, (%rax) + +shldl %cl, %esi, %edi +shrdl %cl, %esi, %edi +shldl %cl, %esi, (%rax) +shrdl %cl, %esi, (%rax) +shldl $7, %esi, %edi +shrdl $7, %esi, %edi +shldl $7, %esi, (%rax) +shrdl $7, %esi, (%rax) + +shldq %cl, %rsi, %rdi +shrdq %cl, %rsi, %rdi +shldq %cl, %rsi, (%rax) +shrdq %cl, %rsi, (%rax) +shldq $7, %rsi, %rdi +shrdq $7, %rsi, %rdi +shldq $7, %rsi, (%rax) +shrdq $7, %rsi, (%rax) + +stc +std + +stosb +stosw +stosl +stosq + +subb $7, %al +subb $7, %dil +subb $7, (%rax) +subb %sil, %dil +subb %sil, (%rax) +subb (%rax), %dil + +subw $511, %ax +subw $511, %di +subw $511, (%rax) +subw $7, %di +subw $7, (%rax) +subw %si, %di +subw %si, (%rax) +subw (%rax), %di + +subl $665536, %eax +subl $665536, %edi +subl $665536, (%rax) +subl $7, %edi +subl $7, (%rax) +subl %esi, %edi +subl %esi, (%rax) +subl (%rax), %edi + +subq $665536, %rax +subq $665536, %rdi +subq $665536, (%rax) +subq $7, %rdi +subq $7, (%rax) +subq %rsi, %rdi +subq %rsi, (%rax) +subq (%rax), %rdi + +testb $7, %al +testb $7, %dil +testb $7, (%rax) +testb %sil, %dil +testb %sil, (%rax) + +testw $511, %ax +testw $511, %di +testw $511, (%rax) +testw $7, %di +testw $7, (%rax) +testw %si, %di +testw %si, (%rax) + +testl $665536, %eax +testl $665536, %edi +testl $665536, (%rax) +testl $7, %edi +testl $7, (%rax) +testl %esi, %edi +testl %esi, (%rax) + +testq $665536, %rax +testq $665536, %rdi +testq $665536, (%rax) +testq $7, %rdi +testq $7, (%rax) +testq %rsi, %rdi +testq %rsi, (%rax) + +ud2 + +wrmsr + +xaddb %bl, %cl +xaddb %bl, (%rcx) + +xaddw %bx, %cx +xaddw %ax, (%rbx) + +xaddl %ebx, %ecx +xaddl %eax, (%rbx) + +xaddq %rbx, %rcx +xaddq %rax, (%rbx) + +xchgb %bl, %cl +xchgb %bl, (%rbx) + +xchgw %ax, %bx +xchgw %bx, %cx +xchgw %ax, (%rbx) + +xchgl %eax, %ebx +xchgl %ebx, %ecx +xchgl %eax, (%rbx) + +xchgq %rax, %rbx +xchgq %rbx, %rcx +xchgq %rax, (%rbx) + +xlatb + +xorb $7, %al +xorb $7, %dil +xorb $7, (%rax) +xorb %sil, %dil +xorb %sil, (%rax) +xorb (%rax), %dil + +xorw $511, %ax +xorw $511, %di +xorw $511, (%rax) +xorw $7, %di +xorw $7, (%rax) +xorw %si, %di +xorw %si, (%rax) +xorw (%rax), %di + +xorl $665536, %eax +xorl $665536, %edi +xorl $665536, (%rax) +xorl $7, %edi +xorl $7, (%rax) +xorl %esi, %edi +xorl %esi, (%rax) +xorl (%rax), %edi + +xorq $665536, %rax +xorq $665536, %rdi +xorq $665536, (%rax) +xorq $7, %rdi +xorq $7, (%rax) +xorq %rsi, %rdi +xorq %rsi, (%rax) +xorq (%rax), %rdi + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 adcb $0, %al +# CHECK-NEXT: 1 1 1.00 adcb $0, %dil +# CHECK-NEXT: 1 6 1.00 * * adcb $0, (%rax) +# CHECK-NEXT: 1 1 1.00 adcb $7, %al +# CHECK-NEXT: 1 1 1.00 adcb $7, %dil +# CHECK-NEXT: 1 6 1.00 * * adcb $7, (%rax) +# CHECK-NEXT: 1 1 1.00 adcb %sil, %dil +# CHECK-NEXT: 1 1 1.75 * * adcb %sil, (%rax) +# CHECK-NEXT: 1 5 1.00 * adcb (%rax), %dil +# CHECK-NEXT: 1 1 1.00 adcw $0, %ax +# CHECK-NEXT: 1 1 1.00 adcw $0, %di +# CHECK-NEXT: 1 6 1.00 * * adcw $0, (%rax) +# CHECK-NEXT: 1 1 1.00 adcw $511, %ax +# CHECK-NEXT: 1 1 1.00 adcw $511, %di +# CHECK-NEXT: 1 6 1.00 * * adcw $511, (%rax) +# CHECK-NEXT: 1 1 1.00 adcw $7, %di +# CHECK-NEXT: 1 6 1.00 * * adcw $7, (%rax) +# CHECK-NEXT: 1 1 1.00 adcw %si, %di +# CHECK-NEXT: 1 6 1.00 * * adcw %si, (%rax) +# CHECK-NEXT: 1 5 1.00 * adcw (%rax), %di +# CHECK-NEXT: 1 1 1.00 adcl $0, %eax +# CHECK-NEXT: 1 1 1.00 adcl $0, %edi +# CHECK-NEXT: 1 6 1.00 * * adcl $0, (%rax) +# CHECK-NEXT: 1 1 1.00 adcl $665536, %eax +# CHECK-NEXT: 1 1 1.00 adcl $665536, %edi +# CHECK-NEXT: 1 6 1.00 * * adcl $665536, (%rax) +# CHECK-NEXT: 1 1 1.00 adcl $7, %edi +# CHECK-NEXT: 1 6 1.00 * * adcl $7, (%rax) +# CHECK-NEXT: 1 1 1.00 adcl %esi, %edi +# CHECK-NEXT: 1 6 1.00 * * adcl %esi, (%rax) +# CHECK-NEXT: 1 5 1.00 * adcl (%rax), %edi +# CHECK-NEXT: 1 1 1.00 adcq $0, %rax +# CHECK-NEXT: 1 1 1.00 adcq $0, %rdi +# CHECK-NEXT: 1 6 1.00 * * adcq $0, (%rax) +# CHECK-NEXT: 1 1 1.00 adcq $665536, %rax +# CHECK-NEXT: 1 1 1.00 adcq $665536, %rdi +# CHECK-NEXT: 1 6 1.00 * * adcq $665536, (%rax) +# CHECK-NEXT: 1 1 1.00 adcq $7, %rdi +# CHECK-NEXT: 1 6 1.00 * * adcq $7, (%rax) +# CHECK-NEXT: 1 1 1.00 adcq %rsi, %rdi +# CHECK-NEXT: 1 6 1.00 * * adcq %rsi, (%rax) +# CHECK-NEXT: 1 5 1.00 * adcq (%rax), %rdi +# CHECK-NEXT: 1 1 1.00 addb $7, %al +# CHECK-NEXT: 1 1 0.25 addb $7, %dil +# CHECK-NEXT: 1 6 0.67 * * addb $7, (%rax) +# CHECK-NEXT: 1 1 0.25 addb %sil, %dil +# CHECK-NEXT: 1 6 0.67 * * addb %sil, (%rax) +# CHECK-NEXT: 1 5 0.33 * addb (%rax), %dil +# CHECK-NEXT: 1 1 1.00 addw $511, %ax +# CHECK-NEXT: 1 1 0.25 addw $511, %di +# CHECK-NEXT: 1 6 0.67 * * addw $511, (%rax) +# CHECK-NEXT: 1 1 0.25 addw $7, %di +# CHECK-NEXT: 1 6 0.67 * * addw $7, (%rax) +# CHECK-NEXT: 1 1 0.25 addw %si, %di +# CHECK-NEXT: 1 6 0.67 * * addw %si, (%rax) +# CHECK-NEXT: 1 5 0.33 * addw (%rax), %di +# CHECK-NEXT: 1 1 1.00 addl $665536, %eax +# CHECK-NEXT: 1 1 0.25 addl $665536, %edi +# CHECK-NEXT: 1 6 0.67 * * addl $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 addl $7, %edi +# CHECK-NEXT: 1 6 0.67 * * addl $7, (%rax) +# CHECK-NEXT: 1 1 0.25 addl %esi, %edi +# CHECK-NEXT: 1 6 0.67 * * addl %esi, (%rax) +# CHECK-NEXT: 1 5 0.33 * addl (%rax), %edi +# CHECK-NEXT: 1 1 1.00 addq $665536, %rax +# CHECK-NEXT: 1 1 0.25 addq $665536, %rdi +# CHECK-NEXT: 1 6 0.67 * * addq $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 addq $7, %rdi +# CHECK-NEXT: 1 6 0.67 * * addq $7, (%rax) +# CHECK-NEXT: 1 1 0.25 addq %rsi, %rdi +# CHECK-NEXT: 1 6 0.67 * * addq %rsi, (%rax) +# CHECK-NEXT: 1 5 0.33 * addq (%rax), %rdi +# CHECK-NEXT: 1 1 1.00 andb $7, %al +# CHECK-NEXT: 1 1 0.25 andb $7, %dil +# CHECK-NEXT: 1 6 0.67 * * andb $7, (%rax) +# CHECK-NEXT: 1 1 0.25 andb %sil, %dil +# CHECK-NEXT: 1 6 0.67 * * andb %sil, (%rax) +# CHECK-NEXT: 1 5 0.33 * andb (%rax), %dil +# CHECK-NEXT: 1 1 1.00 andw $511, %ax +# CHECK-NEXT: 1 1 0.25 andw $511, %di +# CHECK-NEXT: 1 6 0.67 * * andw $511, (%rax) +# CHECK-NEXT: 1 1 0.25 andw $7, %di +# CHECK-NEXT: 1 6 0.67 * * andw $7, (%rax) +# CHECK-NEXT: 1 1 0.25 andw %si, %di +# CHECK-NEXT: 1 6 0.67 * * andw %si, (%rax) +# CHECK-NEXT: 1 5 0.33 * andw (%rax), %di +# CHECK-NEXT: 1 1 1.00 andl $665536, %eax +# CHECK-NEXT: 1 1 0.25 andl $665536, %edi +# CHECK-NEXT: 1 6 0.67 * * andl $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 andl $7, %edi +# CHECK-NEXT: 1 6 0.67 * * andl $7, (%rax) +# CHECK-NEXT: 1 1 0.25 andl %esi, %edi +# CHECK-NEXT: 1 6 0.67 * * andl %esi, (%rax) +# CHECK-NEXT: 1 5 0.33 * andl (%rax), %edi +# CHECK-NEXT: 1 1 1.00 andq $665536, %rax +# CHECK-NEXT: 1 1 0.25 andq $665536, %rdi +# CHECK-NEXT: 1 6 0.67 * * andq $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 andq $7, %rdi +# CHECK-NEXT: 1 6 0.67 * * andq $7, (%rax) +# CHECK-NEXT: 1 1 0.25 andq %rsi, %rdi +# CHECK-NEXT: 1 6 0.67 * * andq %rsi, (%rax) +# CHECK-NEXT: 1 5 0.33 * andq (%rax), %rdi +# CHECK-NEXT: 6 3 3.00 bsfw %si, %di +# CHECK-NEXT: 6 4 4.00 bsrw %si, %di +# CHECK-NEXT: 8 7 3.00 * bsfw (%rax), %di +# CHECK-NEXT: 8 8 4.00 * bsrw (%rax), %di +# CHECK-NEXT: 6 3 3.00 bsfl %esi, %edi +# CHECK-NEXT: 6 4 4.00 bsrl %esi, %edi +# CHECK-NEXT: 8 7 3.00 * bsfl (%rax), %edi +# CHECK-NEXT: 8 8 4.00 * bsrl (%rax), %edi +# CHECK-NEXT: 6 3 3.00 bsfq %rsi, %rdi +# CHECK-NEXT: 6 4 4.00 bsrq %rsi, %rdi +# CHECK-NEXT: 8 7 3.00 * bsfq (%rax), %rdi +# CHECK-NEXT: 8 8 4.00 * bsrq (%rax), %rdi +# CHECK-NEXT: 1 1 0.25 bswapl %eax +# CHECK-NEXT: 1 1 0.25 bswapq %rax +# CHECK-NEXT: 1 1 0.50 btw %si, %di +# CHECK-NEXT: 2 2 1.00 btcw %si, %di +# CHECK-NEXT: 2 2 1.00 btrw %si, %di +# CHECK-NEXT: 2 2 1.00 btsw %si, %di +# CHECK-NEXT: 7 5 0.50 * btw %si, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btcw %si, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btrw %si, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btsw %si, (%rax) +# CHECK-NEXT: 1 1 0.50 btw $7, %di +# CHECK-NEXT: 2 2 1.00 btcw $7, %di +# CHECK-NEXT: 2 2 1.00 btrw $7, %di +# CHECK-NEXT: 2 2 1.00 btsw $7, %di +# CHECK-NEXT: 2 5 0.50 * btw $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btcw $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btrw $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btsw $7, (%rax) +# CHECK-NEXT: 1 1 0.50 btl %esi, %edi +# CHECK-NEXT: 2 2 1.00 btcl %esi, %edi +# CHECK-NEXT: 2 2 1.00 btrl %esi, %edi +# CHECK-NEXT: 2 2 1.00 btsl %esi, %edi +# CHECK-NEXT: 7 5 0.50 * btl %esi, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btcl %esi, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btrl %esi, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btsl %esi, (%rax) +# CHECK-NEXT: 1 1 0.50 btl $7, %edi +# CHECK-NEXT: 2 2 1.00 btcl $7, %edi +# CHECK-NEXT: 2 2 1.00 btrl $7, %edi +# CHECK-NEXT: 2 2 1.00 btsl $7, %edi +# CHECK-NEXT: 2 5 0.50 * btl $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btcl $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btrl $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btsl $7, (%rax) +# CHECK-NEXT: 1 1 0.50 btq %rsi, %rdi +# CHECK-NEXT: 2 2 1.00 btcq %rsi, %rdi +# CHECK-NEXT: 2 2 1.00 btrq %rsi, %rdi +# CHECK-NEXT: 2 2 1.00 btsq %rsi, %rdi +# CHECK-NEXT: 7 5 0.50 * btq %rsi, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btcq %rsi, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btrq %rsi, (%rax) +# CHECK-NEXT: 9 7 0.67 * * btsq %rsi, (%rax) +# CHECK-NEXT: 1 1 0.50 btq $7, %rdi +# CHECK-NEXT: 2 2 1.00 btcq $7, %rdi +# CHECK-NEXT: 2 2 1.00 btrq $7, %rdi +# CHECK-NEXT: 2 2 1.00 btsq $7, %rdi +# CHECK-NEXT: 2 5 0.50 * btq $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btcq $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btrq $7, (%rax) +# CHECK-NEXT: 4 7 0.67 * * btsq $7, (%rax) +# CHECK-NEXT: 1 1 0.25 cbtw +# CHECK-NEXT: 1 1 0.25 cwtl +# CHECK-NEXT: 1 1 0.25 cltq +# CHECK-NEXT: 1 1 0.25 cwtd +# CHECK-NEXT: 1 1 0.25 cltd +# CHECK-NEXT: 1 1 0.25 cqto +# CHECK-NEXT: 1 1 0.25 U clc +# CHECK-NEXT: 1 1 0.25 U cld +# CHECK-NEXT: 1 1 0.25 U cmc +# CHECK-NEXT: 1 1 0.25 cmpb $7, %al +# CHECK-NEXT: 1 1 0.25 cmpb $7, %dil +# CHECK-NEXT: 1 5 0.33 * cmpb $7, (%rax) +# CHECK-NEXT: 1 1 0.25 cmpb %sil, %dil +# CHECK-NEXT: 1 5 0.33 * cmpb %sil, (%rax) +# CHECK-NEXT: 1 5 0.33 * cmpb (%rax), %dil +# CHECK-NEXT: 1 1 0.25 cmpw $511, %ax +# CHECK-NEXT: 1 1 0.25 cmpw $511, %di +# CHECK-NEXT: 1 5 0.33 * cmpw $511, (%rax) +# CHECK-NEXT: 1 1 0.25 cmpw $7, %di +# CHECK-NEXT: 1 5 0.33 * cmpw $7, (%rax) +# CHECK-NEXT: 1 1 0.25 cmpw %si, %di +# CHECK-NEXT: 1 5 0.33 * cmpw %si, (%rax) +# CHECK-NEXT: 1 5 0.33 * cmpw (%rax), %di +# CHECK-NEXT: 1 1 0.25 cmpl $665536, %eax +# CHECK-NEXT: 1 1 0.25 cmpl $665536, %edi +# CHECK-NEXT: 1 5 0.33 * cmpl $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 cmpl $7, %edi +# CHECK-NEXT: 1 5 0.33 * cmpl $7, (%rax) +# CHECK-NEXT: 1 1 0.25 cmpl %esi, %edi +# CHECK-NEXT: 1 5 0.33 * cmpl %esi, (%rax) +# CHECK-NEXT: 1 5 0.33 * cmpl (%rax), %edi +# CHECK-NEXT: 1 1 0.25 cmpq $665536, %rax +# CHECK-NEXT: 1 1 0.25 cmpq $665536, %rdi +# CHECK-NEXT: 1 5 0.33 * cmpq $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 cmpq $7, %rdi +# CHECK-NEXT: 1 5 0.33 * cmpq $7, (%rax) +# CHECK-NEXT: 1 1 0.25 cmpq %rsi, %rdi +# CHECK-NEXT: 1 5 0.33 * cmpq %rsi, (%rax) +# CHECK-NEXT: 1 5 0.33 * cmpq (%rax), %rdi +# CHECK-NEXT: 100 100 25.00 U cmpsb %es:(%rdi), (%rsi) +# CHECK-NEXT: 100 100 25.00 U cmpsw %es:(%rdi), (%rsi) +# CHECK-NEXT: 100 100 25.00 U cmpsl %es:(%rdi), (%rsi) +# CHECK-NEXT: 100 100 25.00 U cmpsq %es:(%rdi), (%rsi) +# CHECK-NEXT: 3 3 3.00 cmpxchgb %cl, %bl +# CHECK-NEXT: 5 7 3.00 * * cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 5 3 3.00 cmpxchgw %cx, %bx +# CHECK-NEXT: 6 3 3.00 * * cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 5 3 3.00 cmpxchgl %ecx, %ebx +# CHECK-NEXT: 6 3 3.00 * * cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 5 3 3.00 cmpxchgq %rcx, %rbx +# CHECK-NEXT: 6 3 3.00 * * cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 100 100 25.00 U cpuid +# CHECK-NEXT: 1 1 0.25 decb %dil +# CHECK-NEXT: 1 6 0.67 * * decb (%rax) +# CHECK-NEXT: 1 1 0.25 decw %di +# CHECK-NEXT: 1 6 0.67 * * decw (%rax) +# CHECK-NEXT: 1 1 0.25 decl %edi +# CHECK-NEXT: 1 6 0.67 * * decl (%rax) +# CHECK-NEXT: 1 1 0.25 decq %rdi +# CHECK-NEXT: 1 6 0.67 * * decq (%rax) +# CHECK-NEXT: 2 10 10.00 U divb %dil +# CHECK-NEXT: 2 14 10.00 * U divb (%rax) +# CHECK-NEXT: 2 11 11.00 U divw %si +# CHECK-NEXT: 2 15 11.00 * U divw (%rax) +# CHECK-NEXT: 2 13 13.00 U divl %edx +# CHECK-NEXT: 2 17 13.00 * U divl (%rax) +# CHECK-NEXT: 2 17 17.00 U divq %rcx +# CHECK-NEXT: 2 21 17.00 * U divq (%rax) +# CHECK-NEXT: 100 100 25.00 U enter $7, $4095 +# CHECK-NEXT: 2 10 10.00 U idivb %dil +# CHECK-NEXT: 2 14 10.00 * U idivb (%rax) +# CHECK-NEXT: 2 11 11.00 U idivw %si +# CHECK-NEXT: 2 15 11.00 * U idivw (%rax) +# CHECK-NEXT: 2 13 13.00 U idivl %edx +# CHECK-NEXT: 2 17 13.00 * U idivl (%rax) +# CHECK-NEXT: 2 17 17.00 U idivq %rcx +# CHECK-NEXT: 2 21 17.00 * U idivq (%rax) +# CHECK-NEXT: 1 3 3.00 imulb %dil +# CHECK-NEXT: 1 7 3.00 * imulb (%rax) +# CHECK-NEXT: 3 3 3.00 imulw %di +# CHECK-NEXT: 4 7 3.00 * imulw (%rax) +# CHECK-NEXT: 1 3 1.00 imulw %si, %di +# CHECK-NEXT: 1 7 1.00 * imulw (%rax), %di +# CHECK-NEXT: 2 4 4.00 imulw $511, %si, %di +# CHECK-NEXT: 2 8 4.00 * imulw $511, (%rax), %di +# CHECK-NEXT: 2 4 4.00 imulw $7, %si, %di +# CHECK-NEXT: 2 8 4.00 * imulw $7, (%rax), %di +# CHECK-NEXT: 2 3 3.00 imull %edi +# CHECK-NEXT: 2 7 3.00 * imull (%rax) +# CHECK-NEXT: 1 3 1.00 imull %esi, %edi +# CHECK-NEXT: 1 7 1.00 * imull (%rax), %edi +# CHECK-NEXT: 1 3 1.00 imull $665536, %esi, %edi +# CHECK-NEXT: 1 7 1.00 * imull $665536, (%rax), %edi +# CHECK-NEXT: 1 3 1.00 imull $7, %esi, %edi +# CHECK-NEXT: 1 7 1.00 * imull $7, (%rax), %edi +# CHECK-NEXT: 2 3 3.00 imulq %rdi +# CHECK-NEXT: 2 7 3.00 * imulq (%rax) +# CHECK-NEXT: 1 3 1.00 imulq %rsi, %rdi +# CHECK-NEXT: 1 7 1.00 * imulq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 imulq $665536, %rsi, %rdi +# CHECK-NEXT: 1 7 1.00 * imulq $665536, (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 imulq $7, %rsi, %rdi +# CHECK-NEXT: 1 7 1.00 * imulq $7, (%rax), %rdi +# CHECK-NEXT: 100 100 25.00 U inb $7, %al +# CHECK-NEXT: 100 100 25.00 U inb %dx, %al +# CHECK-NEXT: 100 100 25.00 U inw $7, %ax +# CHECK-NEXT: 100 100 25.00 U inw %dx, %ax +# CHECK-NEXT: 100 100 25.00 U inl $7, %eax +# CHECK-NEXT: 100 100 25.00 U inl %dx, %eax +# CHECK-NEXT: 1 1 0.25 incb %dil +# CHECK-NEXT: 1 6 0.67 * * incb (%rax) +# CHECK-NEXT: 1 1 0.25 incw %di +# CHECK-NEXT: 1 6 0.67 * * incw (%rax) +# CHECK-NEXT: 1 1 0.25 incl %edi +# CHECK-NEXT: 1 6 0.67 * * incl (%rax) +# CHECK-NEXT: 1 1 0.25 incq %rdi +# CHECK-NEXT: 1 6 0.67 * * incq (%rax) +# CHECK-NEXT: 100 100 25.00 U insb %dx, %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 U insw %dx, %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 U insl %dx, %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 * * U int $7 +# CHECK-NEXT: 100 100 25.00 U invlpg (%rax) +# CHECK-NEXT: 100 100 25.00 U invlpga +# CHECK-NEXT: 1 1 1.00 lahf +# CHECK-NEXT: 1 1 0.25 * leave +# CHECK-NEXT: 100 100 25.00 U lodsb (%rsi), %al +# CHECK-NEXT: 100 100 25.00 U lodsw (%rsi), %ax +# CHECK-NEXT: 100 100 25.00 U lodsl (%rsi), %eax +# CHECK-NEXT: 100 100 25.00 U lodsq (%rsi), %rax +# CHECK-NEXT: 100 100 25.00 U movsb (%rsi), %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 U movsw (%rsi), %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 U movsl (%rsi), %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 U movsq (%rsi), %es:(%rdi) +# CHECK-NEXT: 1 1 1.00 movsbw %al, %di +# CHECK-NEXT: 1 1 1.00 movzbw %al, %di +# CHECK-NEXT: 1 5 1.00 * movsbw (%rax), %di +# CHECK-NEXT: 1 5 1.00 * movzbw (%rax), %di +# CHECK-NEXT: 1 1 0.25 movsbl %al, %edi +# CHECK-NEXT: 1 1 0.25 movzbl %al, %edi +# CHECK-NEXT: 1 5 0.33 * movsbl (%rax), %edi +# CHECK-NEXT: 1 5 0.33 * movzbl (%rax), %edi +# CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi +# CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi +# CHECK-NEXT: 1 5 0.33 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 5 0.33 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 1 0.25 movswl %ax, %edi +# CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi +# CHECK-NEXT: 1 5 0.33 * movswl (%rax), %edi +# CHECK-NEXT: 1 5 0.33 * movzwl (%rax), %edi +# CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi +# CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi +# CHECK-NEXT: 1 5 0.33 * movswq (%rax), %rdi +# CHECK-NEXT: 1 5 0.33 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 1 0.25 movslq %eax, %rdi +# CHECK-NEXT: 1 5 0.33 * movslq (%rax), %rdi +# CHECK-NEXT: 1 3 3.00 mulb %dil +# CHECK-NEXT: 1 7 3.00 * mulb (%rax) +# CHECK-NEXT: 3 3 3.00 mulw %si +# CHECK-NEXT: 4 7 3.00 * mulw (%rax) +# CHECK-NEXT: 2 3 3.00 mull %edx +# CHECK-NEXT: 2 7 3.00 * mull (%rax) +# CHECK-NEXT: 2 3 3.00 mulq %rcx +# CHECK-NEXT: 2 7 3.00 * mulq (%rax) +# CHECK-NEXT: 1 1 0.25 negb %dil +# CHECK-NEXT: 1 6 0.67 * * negb (%r8) +# CHECK-NEXT: 1 1 0.25 negw %si +# CHECK-NEXT: 1 6 0.67 * * negw (%r9) +# CHECK-NEXT: 1 1 0.25 negl %edx +# CHECK-NEXT: 1 6 0.67 * * negl (%rax) +# CHECK-NEXT: 1 1 0.25 negq %rcx +# CHECK-NEXT: 1 6 0.67 * * negq (%r10) +# CHECK-NEXT: 1 0 0.25 nop +# CHECK-NEXT: 1 0 0.25 nopw %di +# CHECK-NEXT: 1 0 0.25 nopw (%rcx) +# CHECK-NEXT: 1 0 0.25 nopl %esi +# CHECK-NEXT: 1 0 0.25 nopl (%r8) +# CHECK-NEXT: 1 0 0.25 nopq %rdx +# CHECK-NEXT: 1 0 0.25 nopq (%r9) +# CHECK-NEXT: 1 1 0.25 notb %dil +# CHECK-NEXT: 1 6 0.67 * * notb (%r8) +# CHECK-NEXT: 1 1 0.25 notw %si +# CHECK-NEXT: 1 6 0.67 * * notw (%r9) +# CHECK-NEXT: 1 1 0.25 notl %edx +# CHECK-NEXT: 1 6 0.67 * * notl (%rax) +# CHECK-NEXT: 1 1 0.25 notq %rcx +# CHECK-NEXT: 1 6 0.67 * * notq (%r10) +# CHECK-NEXT: 1 1 1.00 orb $7, %al +# CHECK-NEXT: 1 1 0.25 orb $7, %dil +# CHECK-NEXT: 1 6 0.67 * * orb $7, (%rax) +# CHECK-NEXT: 1 1 0.25 orb %sil, %dil +# CHECK-NEXT: 1 6 0.67 * * orb %sil, (%rax) +# CHECK-NEXT: 1 5 0.33 * orb (%rax), %dil +# CHECK-NEXT: 1 1 1.00 orw $511, %ax +# CHECK-NEXT: 1 1 0.25 orw $511, %di +# CHECK-NEXT: 1 6 0.67 * * orw $511, (%rax) +# CHECK-NEXT: 1 1 0.25 orw $7, %di +# CHECK-NEXT: 1 6 0.67 * * orw $7, (%rax) +# CHECK-NEXT: 1 1 0.25 orw %si, %di +# CHECK-NEXT: 1 6 0.67 * * orw %si, (%rax) +# CHECK-NEXT: 1 5 0.33 * orw (%rax), %di +# CHECK-NEXT: 1 1 1.00 orl $665536, %eax +# CHECK-NEXT: 1 1 0.25 orl $665536, %edi +# CHECK-NEXT: 1 6 0.67 * * orl $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 orl $7, %edi +# CHECK-NEXT: 1 6 0.67 * * orl $7, (%rax) +# CHECK-NEXT: 1 1 0.25 orl %esi, %edi +# CHECK-NEXT: 1 6 0.67 * * orl %esi, (%rax) +# CHECK-NEXT: 1 5 0.33 * orl (%rax), %edi +# CHECK-NEXT: 1 1 1.00 orq $665536, %rax +# CHECK-NEXT: 1 1 0.25 orq $665536, %rdi +# CHECK-NEXT: 1 6 0.67 * * orq $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 orq $7, %rdi +# CHECK-NEXT: 1 6 0.67 * * orq $7, (%rax) +# CHECK-NEXT: 1 1 0.25 orq %rsi, %rdi +# CHECK-NEXT: 1 6 0.67 * * orq %rsi, (%rax) +# CHECK-NEXT: 1 5 0.33 * orq (%rax), %rdi +# CHECK-NEXT: 100 100 25.00 U outb %al, $7 +# CHECK-NEXT: 100 100 25.00 U outb %al, %dx +# CHECK-NEXT: 100 100 25.00 U outw %ax, $7 +# CHECK-NEXT: 100 100 25.00 U outw %ax, %dx +# CHECK-NEXT: 100 100 25.00 U outl %eax, $7 +# CHECK-NEXT: 100 100 25.00 U outl %eax, %dx +# CHECK-NEXT: 100 100 25.00 U outsb (%rsi), %dx +# CHECK-NEXT: 100 100 25.00 U outsw (%rsi), %dx +# CHECK-NEXT: 100 100 25.00 U outsl (%rsi), %dx +# CHECK-NEXT: 1 0 0.25 * * U pause +# CHECK-NEXT: 1 1 1.00 rclb %dil +# CHECK-NEXT: 1 1 1.00 rcrb %dil +# CHECK-NEXT: 2 5 1.00 * rclb (%rax) +# CHECK-NEXT: 2 5 1.00 * rcrb (%rax) +# CHECK-NEXT: 9 4 4.00 rclb $7, %dil +# CHECK-NEXT: 7 3 3.00 rcrb $7, %dil +# CHECK-NEXT: 11 8 4.00 * rclb $7, (%rax) +# CHECK-NEXT: 10 7 4.00 * rcrb $7, (%rax) +# CHECK-NEXT: 9 4 4.00 rclb %cl, %dil +# CHECK-NEXT: 7 3 3.00 rcrb %cl, %dil +# CHECK-NEXT: 11 8 4.00 * rclb %cl, (%rax) +# CHECK-NEXT: 9 7 4.00 * rcrb %cl, (%rax) +# CHECK-NEXT: 1 1 1.00 rclw %di +# CHECK-NEXT: 1 1 1.00 rcrw %di +# CHECK-NEXT: 2 5 1.00 * rclw (%rax) +# CHECK-NEXT: 2 5 1.00 * rcrw (%rax) +# CHECK-NEXT: 9 4 4.00 rclw $7, %di +# CHECK-NEXT: 7 3 3.00 rcrw $7, %di +# CHECK-NEXT: 11 8 4.00 * rclw $7, (%rax) +# CHECK-NEXT: 10 7 4.00 * rcrw $7, (%rax) +# CHECK-NEXT: 9 4 4.00 rclw %cl, %di +# CHECK-NEXT: 7 3 3.00 rcrw %cl, %di +# CHECK-NEXT: 11 8 4.00 * rclw %cl, (%rax) +# CHECK-NEXT: 9 7 4.00 * rcrw %cl, (%rax) +# CHECK-NEXT: 1 1 1.00 rcll %edi +# CHECK-NEXT: 1 1 1.00 rcrl %edi +# CHECK-NEXT: 2 5 1.00 * rcll (%rax) +# CHECK-NEXT: 2 5 1.00 * rcrl (%rax) +# CHECK-NEXT: 9 4 4.00 rcll $7, %edi +# CHECK-NEXT: 7 3 3.00 rcrl $7, %edi +# CHECK-NEXT: 11 8 4.00 * rcll $7, (%rax) +# CHECK-NEXT: 10 7 4.00 * rcrl $7, (%rax) +# CHECK-NEXT: 9 4 4.00 rcll %cl, %edi +# CHECK-NEXT: 7 3 3.00 rcrl %cl, %edi +# CHECK-NEXT: 11 8 4.00 * rcll %cl, (%rax) +# CHECK-NEXT: 9 7 4.00 * rcrl %cl, (%rax) +# CHECK-NEXT: 1 1 1.00 rclq %rdi +# CHECK-NEXT: 1 1 1.00 rcrq %rdi +# CHECK-NEXT: 2 5 1.00 * rclq (%rax) +# CHECK-NEXT: 2 5 1.00 * rcrq (%rax) +# CHECK-NEXT: 9 4 4.00 rclq $7, %rdi +# CHECK-NEXT: 7 3 3.00 rcrq $7, %rdi +# CHECK-NEXT: 11 8 4.00 * rclq $7, (%rax) +# CHECK-NEXT: 10 7 4.00 * rcrq $7, (%rax) +# CHECK-NEXT: 9 4 4.00 rclq %cl, %rdi +# CHECK-NEXT: 7 3 3.00 rcrq %cl, %rdi +# CHECK-NEXT: 11 8 4.00 * rclq %cl, (%rax) +# CHECK-NEXT: 9 7 4.00 * rcrq %cl, (%rax) +# CHECK-NEXT: 100 100 25.00 U rdmsr +# CHECK-NEXT: 100 100 25.00 U rdpmc +# CHECK-NEXT: 100 100 25.00 U rdtsc +# CHECK-NEXT: 100 100 25.00 U rdtscp +# CHECK-NEXT: 1 1 0.50 rolb %dil +# CHECK-NEXT: 1 1 0.50 rorb %dil +# CHECK-NEXT: 2 5 0.67 * * rolb (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorb (%rax) +# CHECK-NEXT: 1 1 0.50 rolb $7, %dil +# CHECK-NEXT: 1 1 0.50 rorb $7, %dil +# CHECK-NEXT: 2 5 0.67 * * rolb $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorb $7, (%rax) +# CHECK-NEXT: 1 1 0.50 rolb %cl, %dil +# CHECK-NEXT: 1 1 0.50 rorb %cl, %dil +# CHECK-NEXT: 2 5 0.67 * * rolb %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorb %cl, (%rax) +# CHECK-NEXT: 1 1 0.50 rolw %di +# CHECK-NEXT: 1 1 0.50 rorw %di +# CHECK-NEXT: 2 5 0.67 * * rolw (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorw (%rax) +# CHECK-NEXT: 1 1 0.50 rolw $7, %di +# CHECK-NEXT: 1 1 0.50 rorw $7, %di +# CHECK-NEXT: 2 5 0.67 * * rolw $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorw $7, (%rax) +# CHECK-NEXT: 1 1 0.50 rolw %cl, %di +# CHECK-NEXT: 1 1 0.50 rorw %cl, %di +# CHECK-NEXT: 2 5 0.67 * * rolw %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorw %cl, (%rax) +# CHECK-NEXT: 1 1 0.50 roll %edi +# CHECK-NEXT: 1 1 0.50 rorl %edi +# CHECK-NEXT: 2 5 0.67 * * roll (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorl (%rax) +# CHECK-NEXT: 1 1 0.50 roll $7, %edi +# CHECK-NEXT: 1 1 0.50 rorl $7, %edi +# CHECK-NEXT: 2 5 0.67 * * roll $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorl $7, (%rax) +# CHECK-NEXT: 1 1 0.50 roll %cl, %edi +# CHECK-NEXT: 1 1 0.50 rorl %cl, %edi +# CHECK-NEXT: 2 5 0.67 * * roll %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorl %cl, (%rax) +# CHECK-NEXT: 1 1 0.50 rolq %rdi +# CHECK-NEXT: 1 1 0.50 rorq %rdi +# CHECK-NEXT: 2 5 0.67 * * rolq (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorq (%rax) +# CHECK-NEXT: 1 1 0.50 rolq $7, %rdi +# CHECK-NEXT: 1 1 0.50 rorq $7, %rdi +# CHECK-NEXT: 2 5 0.67 * * rolq $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorq $7, (%rax) +# CHECK-NEXT: 1 1 0.50 rolq %cl, %rdi +# CHECK-NEXT: 1 1 0.50 rorq %cl, %rdi +# CHECK-NEXT: 2 5 0.67 * * rolq %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * rorq %cl, (%rax) +# CHECK-NEXT: 1 1 1.00 sahf +# CHECK-NEXT: 1 1 0.50 sarb %dil +# CHECK-NEXT: 1 1 0.50 shlb %dil +# CHECK-NEXT: 1 1 0.50 shrb %dil +# CHECK-NEXT: 2 5 0.67 * * sarb (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlb (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrb (%rax) +# CHECK-NEXT: 1 1 0.50 sarb $7, %dil +# CHECK-NEXT: 1 1 0.50 shlb $7, %dil +# CHECK-NEXT: 1 1 0.50 shrb $7, %dil +# CHECK-NEXT: 2 5 0.67 * * sarb $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlb $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrb $7, (%rax) +# CHECK-NEXT: 1 1 0.50 sarb %cl, %dil +# CHECK-NEXT: 1 1 0.50 shlb %cl, %dil +# CHECK-NEXT: 1 1 0.50 shrb %cl, %dil +# CHECK-NEXT: 2 5 0.67 * * sarb %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlb %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrb %cl, (%rax) +# CHECK-NEXT: 1 1 0.50 sarw %di +# CHECK-NEXT: 1 1 0.50 shlw %di +# CHECK-NEXT: 1 1 0.50 shrw %di +# CHECK-NEXT: 2 5 0.67 * * sarw (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlw (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrw (%rax) +# CHECK-NEXT: 1 1 0.50 sarw $7, %di +# CHECK-NEXT: 1 1 0.50 shlw $7, %di +# CHECK-NEXT: 1 1 0.50 shrw $7, %di +# CHECK-NEXT: 2 5 0.67 * * sarw $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlw $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrw $7, (%rax) +# CHECK-NEXT: 1 1 0.50 sarw %cl, %di +# CHECK-NEXT: 1 1 0.50 shlw %cl, %di +# CHECK-NEXT: 1 1 0.50 shrw %cl, %di +# CHECK-NEXT: 2 5 0.67 * * sarw %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlw %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrw %cl, (%rax) +# CHECK-NEXT: 1 1 0.50 sarl %edi +# CHECK-NEXT: 1 1 0.50 shll %edi +# CHECK-NEXT: 1 1 0.50 shrl %edi +# CHECK-NEXT: 2 5 0.67 * * sarl (%rax) +# CHECK-NEXT: 2 5 0.67 * * shll (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrl (%rax) +# CHECK-NEXT: 1 1 0.50 sarl $7, %edi +# CHECK-NEXT: 1 1 0.50 shll $7, %edi +# CHECK-NEXT: 1 1 0.50 shrl $7, %edi +# CHECK-NEXT: 2 5 0.67 * * sarl $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shll $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrl $7, (%rax) +# CHECK-NEXT: 1 1 0.50 sarl %cl, %edi +# CHECK-NEXT: 1 1 0.50 shll %cl, %edi +# CHECK-NEXT: 1 1 0.50 shrl %cl, %edi +# CHECK-NEXT: 2 5 0.67 * * sarl %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shll %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrl %cl, (%rax) +# CHECK-NEXT: 1 1 0.50 sarq %rdi +# CHECK-NEXT: 1 1 0.50 shlq %rdi +# CHECK-NEXT: 1 1 0.50 shrq %rdi +# CHECK-NEXT: 2 5 0.67 * * sarq (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlq (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrq (%rax) +# CHECK-NEXT: 1 1 0.50 sarq $7, %rdi +# CHECK-NEXT: 1 1 0.50 shlq $7, %rdi +# CHECK-NEXT: 1 1 0.50 shrq $7, %rdi +# CHECK-NEXT: 2 5 0.67 * * sarq $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlq $7, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrq $7, (%rax) +# CHECK-NEXT: 1 1 0.50 sarq %cl, %rdi +# CHECK-NEXT: 1 1 0.50 shlq %cl, %rdi +# CHECK-NEXT: 1 1 0.50 shrq %cl, %rdi +# CHECK-NEXT: 2 5 0.67 * * sarq %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shlq %cl, (%rax) +# CHECK-NEXT: 2 5 0.67 * * shrq %cl, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbb $0, %al +# CHECK-NEXT: 1 1 1.00 sbbb $0, %dil +# CHECK-NEXT: 1 6 1.00 * * sbbb $0, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbb $7, %al +# CHECK-NEXT: 1 1 1.00 sbbb $7, %dil +# CHECK-NEXT: 1 6 1.00 * * sbbb $7, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbb %sil, %dil +# CHECK-NEXT: 1 1 1.75 * * sbbb %sil, (%rax) +# CHECK-NEXT: 1 5 1.00 * sbbb (%rax), %dil +# CHECK-NEXT: 1 1 1.00 sbbw $0, %ax +# CHECK-NEXT: 1 1 1.00 sbbw $0, %di +# CHECK-NEXT: 1 6 1.00 * * sbbw $0, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbw $511, %ax +# CHECK-NEXT: 1 1 1.00 sbbw $511, %di +# CHECK-NEXT: 1 6 1.00 * * sbbw $511, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbw $7, %di +# CHECK-NEXT: 1 6 1.00 * * sbbw $7, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbw %si, %di +# CHECK-NEXT: 1 6 1.00 * * sbbw %si, (%rax) +# CHECK-NEXT: 1 5 1.00 * sbbw (%rax), %di +# CHECK-NEXT: 1 1 1.00 sbbl $0, %eax +# CHECK-NEXT: 1 1 1.00 sbbl $0, %edi +# CHECK-NEXT: 1 6 1.00 * * sbbl $0, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbl $665536, %eax +# CHECK-NEXT: 1 1 1.00 sbbl $665536, %edi +# CHECK-NEXT: 1 6 1.00 * * sbbl $665536, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbl $7, %edi +# CHECK-NEXT: 1 6 1.00 * * sbbl $7, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbl %esi, %edi +# CHECK-NEXT: 1 6 1.00 * * sbbl %esi, (%rax) +# CHECK-NEXT: 1 5 1.00 * sbbl (%rax), %edi +# CHECK-NEXT: 1 1 1.00 sbbq $0, %rax +# CHECK-NEXT: 1 1 1.00 sbbq $0, %rdi +# CHECK-NEXT: 1 6 1.00 * * sbbq $0, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbq $665536, %rax +# CHECK-NEXT: 1 1 1.00 sbbq $665536, %rdi +# CHECK-NEXT: 1 6 1.00 * * sbbq $665536, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbq $7, %rdi +# CHECK-NEXT: 1 6 1.00 * * sbbq $7, (%rax) +# CHECK-NEXT: 1 1 1.00 sbbq %rsi, %rdi +# CHECK-NEXT: 1 6 1.00 * * sbbq %rsi, (%rax) +# CHECK-NEXT: 1 5 1.00 * sbbq (%rax), %rdi +# CHECK-NEXT: 100 100 25.00 U scasb %es:(%rdi), %al +# CHECK-NEXT: 100 100 25.00 U scasw %es:(%rdi), %ax +# CHECK-NEXT: 100 100 25.00 U scasl %es:(%rdi), %eax +# CHECK-NEXT: 100 100 25.00 U scasq %es:(%rdi), %rax +# CHECK-NEXT: 1 1 1.00 seto %al +# CHECK-NEXT: 2 2 1.00 * seto (%rax) +# CHECK-NEXT: 1 1 1.00 setno %al +# CHECK-NEXT: 2 2 1.00 * setno (%rax) +# CHECK-NEXT: 1 1 1.00 setb %al +# CHECK-NEXT: 2 2 1.00 * setb (%rax) +# CHECK-NEXT: 1 1 1.00 setae %al +# CHECK-NEXT: 2 2 1.00 * setae (%rax) +# CHECK-NEXT: 1 1 1.00 sete %al +# CHECK-NEXT: 2 2 1.00 * sete (%rax) +# CHECK-NEXT: 1 1 1.00 setne %al +# CHECK-NEXT: 2 2 1.00 * setne (%rax) +# CHECK-NEXT: 1 1 1.00 seta %al +# CHECK-NEXT: 2 2 1.00 * seta (%rax) +# CHECK-NEXT: 1 1 1.00 setbe %al +# CHECK-NEXT: 2 2 1.00 * setbe (%rax) +# CHECK-NEXT: 1 1 1.00 sets %al +# CHECK-NEXT: 2 2 1.00 * sets (%rax) +# CHECK-NEXT: 1 1 1.00 setns %al +# CHECK-NEXT: 2 2 1.00 * setns (%rax) +# CHECK-NEXT: 1 1 1.00 setp %al +# CHECK-NEXT: 2 2 1.00 * setp (%rax) +# CHECK-NEXT: 1 1 1.00 setnp %al +# CHECK-NEXT: 2 2 1.00 * setnp (%rax) +# CHECK-NEXT: 1 1 1.00 setl %al +# CHECK-NEXT: 2 2 1.00 * setl (%rax) +# CHECK-NEXT: 1 1 1.00 setge %al +# CHECK-NEXT: 2 2 1.00 * setge (%rax) +# CHECK-NEXT: 1 1 1.00 setg %al +# CHECK-NEXT: 2 2 1.00 * setg (%rax) +# CHECK-NEXT: 1 1 1.00 setle %al +# CHECK-NEXT: 2 2 1.00 * setle (%rax) +# CHECK-NEXT: 5 2 1.50 shldw %cl, %si, %di +# CHECK-NEXT: 5 2 1.50 shrdw %cl, %si, %di +# CHECK-NEXT: 6 6 2.00 * * shldw %cl, %si, (%rax) +# CHECK-NEXT: 6 6 2.00 * * shrdw %cl, %si, (%rax) +# CHECK-NEXT: 4 2 1.50 shldw $7, %si, %di +# CHECK-NEXT: 4 2 1.50 shrdw $7, %si, %di +# CHECK-NEXT: 6 6 2.00 * * shldw $7, %si, (%rax) +# CHECK-NEXT: 6 6 2.00 * * shrdw $7, %si, (%rax) +# CHECK-NEXT: 5 2 1.50 shldl %cl, %esi, %edi +# CHECK-NEXT: 5 2 1.50 shrdl %cl, %esi, %edi +# CHECK-NEXT: 6 6 2.00 * * shldl %cl, %esi, (%rax) +# CHECK-NEXT: 6 6 2.00 * * shrdl %cl, %esi, (%rax) +# CHECK-NEXT: 4 2 1.50 shldl $7, %esi, %edi +# CHECK-NEXT: 4 2 1.50 shrdl $7, %esi, %edi +# CHECK-NEXT: 6 6 2.00 * * shldl $7, %esi, (%rax) +# CHECK-NEXT: 6 6 2.00 * * shrdl $7, %esi, (%rax) +# CHECK-NEXT: 5 2 1.50 shldq %cl, %rsi, %rdi +# CHECK-NEXT: 5 2 1.50 shrdq %cl, %rsi, %rdi +# CHECK-NEXT: 6 6 2.00 * * shldq %cl, %rsi, (%rax) +# CHECK-NEXT: 6 6 2.00 * * shrdq %cl, %rsi, (%rax) +# CHECK-NEXT: 4 2 1.50 shldq $7, %rsi, %rdi +# CHECK-NEXT: 4 2 1.50 shrdq $7, %rsi, %rdi +# CHECK-NEXT: 6 6 2.00 * * shldq $7, %rsi, (%rax) +# CHECK-NEXT: 6 6 2.00 * * shrdq $7, %rsi, (%rax) +# CHECK-NEXT: 1 1 0.25 U stc +# CHECK-NEXT: 1 1 0.25 U std +# CHECK-NEXT: 100 100 25.00 U stosb %al, %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 U stosw %ax, %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 U stosl %eax, %es:(%rdi) +# CHECK-NEXT: 100 100 25.00 U stosq %rax, %es:(%rdi) +# CHECK-NEXT: 1 1 1.00 subb $7, %al +# CHECK-NEXT: 1 1 0.25 subb $7, %dil +# CHECK-NEXT: 1 6 0.67 * * subb $7, (%rax) +# CHECK-NEXT: 1 1 0.25 subb %sil, %dil +# CHECK-NEXT: 1 6 0.67 * * subb %sil, (%rax) +# CHECK-NEXT: 1 5 0.33 * subb (%rax), %dil +# CHECK-NEXT: 1 1 1.00 subw $511, %ax +# CHECK-NEXT: 1 1 0.25 subw $511, %di +# CHECK-NEXT: 1 6 0.67 * * subw $511, (%rax) +# CHECK-NEXT: 1 1 0.25 subw $7, %di +# CHECK-NEXT: 1 6 0.67 * * subw $7, (%rax) +# CHECK-NEXT: 1 1 0.25 subw %si, %di +# CHECK-NEXT: 1 6 0.67 * * subw %si, (%rax) +# CHECK-NEXT: 1 5 0.33 * subw (%rax), %di +# CHECK-NEXT: 1 1 1.00 subl $665536, %eax +# CHECK-NEXT: 1 1 0.25 subl $665536, %edi +# CHECK-NEXT: 1 6 0.67 * * subl $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 subl $7, %edi +# CHECK-NEXT: 1 6 0.67 * * subl $7, (%rax) +# CHECK-NEXT: 1 1 0.25 subl %esi, %edi +# CHECK-NEXT: 1 6 0.67 * * subl %esi, (%rax) +# CHECK-NEXT: 1 5 0.33 * subl (%rax), %edi +# CHECK-NEXT: 1 1 1.00 subq $665536, %rax +# CHECK-NEXT: 1 1 0.25 subq $665536, %rdi +# CHECK-NEXT: 1 6 0.67 * * subq $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 subq $7, %rdi +# CHECK-NEXT: 1 6 0.67 * * subq $7, (%rax) +# CHECK-NEXT: 1 1 0.25 subq %rsi, %rdi +# CHECK-NEXT: 1 6 0.67 * * subq %rsi, (%rax) +# CHECK-NEXT: 1 5 0.33 * subq (%rax), %rdi +# CHECK-NEXT: 1 1 0.25 testb $7, %al +# CHECK-NEXT: 1 1 0.25 testb $7, %dil +# CHECK-NEXT: 1 5 0.33 * testb $7, (%rax) +# CHECK-NEXT: 1 1 0.25 testb %sil, %dil +# CHECK-NEXT: 1 5 0.33 * testb %sil, (%rax) +# CHECK-NEXT: 1 1 0.25 testw $511, %ax +# CHECK-NEXT: 1 1 0.25 testw $511, %di +# CHECK-NEXT: 1 5 0.33 * testw $511, (%rax) +# CHECK-NEXT: 1 1 0.25 testw $7, %di +# CHECK-NEXT: 1 5 0.33 * testw $7, (%rax) +# CHECK-NEXT: 1 1 0.25 testw %si, %di +# CHECK-NEXT: 1 5 0.33 * testw %si, (%rax) +# CHECK-NEXT: 1 1 0.25 testl $665536, %eax +# CHECK-NEXT: 1 1 0.25 testl $665536, %edi +# CHECK-NEXT: 1 5 0.33 * testl $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 testl $7, %edi +# CHECK-NEXT: 1 5 0.33 * testl $7, (%rax) +# CHECK-NEXT: 1 1 0.25 testl %esi, %edi +# CHECK-NEXT: 1 5 0.33 * testl %esi, (%rax) +# CHECK-NEXT: 1 1 0.25 testq $665536, %rax +# CHECK-NEXT: 1 1 0.25 testq $665536, %rdi +# CHECK-NEXT: 1 5 0.33 * testq $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 testq $7, %rdi +# CHECK-NEXT: 1 5 0.33 * testq $7, (%rax) +# CHECK-NEXT: 1 1 0.25 testq %rsi, %rdi +# CHECK-NEXT: 1 5 0.33 * testq %rsi, (%rax) +# CHECK-NEXT: 100 100 25.00 * U ud2 +# CHECK-NEXT: 100 100 25.00 U wrmsr +# CHECK-NEXT: 2 0 2.00 xaddb %bl, %cl +# CHECK-NEXT: 1 5 0.67 * * xaddb %bl, (%rcx) +# CHECK-NEXT: 2 0 2.00 xaddw %bx, %cx +# CHECK-NEXT: 1 5 0.67 * * xaddw %ax, (%rbx) +# CHECK-NEXT: 2 0 2.00 xaddl %ebx, %ecx +# CHECK-NEXT: 1 5 0.67 * * xaddl %eax, (%rbx) +# CHECK-NEXT: 2 0 2.00 xaddq %rbx, %rcx +# CHECK-NEXT: 1 5 0.67 * * xaddq %rax, (%rbx) +# CHECK-NEXT: 2 1 0.50 xchgb %bl, %cl +# CHECK-NEXT: 5 7 0.50 * * xchgb %bl, (%rbx) +# CHECK-NEXT: 2 1 0.50 xchgw %bx, %ax +# CHECK-NEXT: 2 1 0.50 xchgw %bx, %cx +# CHECK-NEXT: 5 7 0.50 * * xchgw %ax, (%rbx) +# CHECK-NEXT: 2 0 2.00 xchgl %ebx, %eax +# CHECK-NEXT: 2 0 2.00 xchgl %ebx, %ecx +# CHECK-NEXT: 2 6 0.50 * * xchgl %eax, (%rbx) +# CHECK-NEXT: 2 0 2.00 xchgq %rbx, %rax +# CHECK-NEXT: 2 0 2.00 xchgq %rbx, %rcx +# CHECK-NEXT: 2 6 0.50 * * xchgq %rax, (%rbx) +# CHECK-NEXT: 1 5 0.33 * xlatb +# CHECK-NEXT: 1 1 1.00 xorb $7, %al +# CHECK-NEXT: 1 1 0.25 xorb $7, %dil +# CHECK-NEXT: 1 6 0.67 * * xorb $7, (%rax) +# CHECK-NEXT: 1 1 0.25 xorb %sil, %dil +# CHECK-NEXT: 1 6 0.67 * * xorb %sil, (%rax) +# CHECK-NEXT: 1 5 0.33 * xorb (%rax), %dil +# CHECK-NEXT: 1 1 1.00 xorw $511, %ax +# CHECK-NEXT: 1 1 0.25 xorw $511, %di +# CHECK-NEXT: 1 6 0.67 * * xorw $511, (%rax) +# CHECK-NEXT: 1 1 0.25 xorw $7, %di +# CHECK-NEXT: 1 6 0.67 * * xorw $7, (%rax) +# CHECK-NEXT: 1 1 0.25 xorw %si, %di +# CHECK-NEXT: 1 6 0.67 * * xorw %si, (%rax) +# CHECK-NEXT: 1 5 0.33 * xorw (%rax), %di +# CHECK-NEXT: 1 1 1.00 xorl $665536, %eax +# CHECK-NEXT: 1 1 0.25 xorl $665536, %edi +# CHECK-NEXT: 1 6 0.67 * * xorl $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 xorl $7, %edi +# CHECK-NEXT: 1 6 0.67 * * xorl $7, (%rax) +# CHECK-NEXT: 1 1 0.25 xorl %esi, %edi +# CHECK-NEXT: 1 6 0.67 * * xorl %esi, (%rax) +# CHECK-NEXT: 1 5 0.33 * xorl (%rax), %edi +# CHECK-NEXT: 1 1 1.00 xorq $665536, %rax +# CHECK-NEXT: 1 1 0.25 xorq $665536, %rdi +# CHECK-NEXT: 1 6 0.67 * * xorq $665536, (%rax) +# CHECK-NEXT: 1 1 0.25 xorq $7, %rdi +# CHECK-NEXT: 1 6 0.67 * * xorq $7, (%rax) +# CHECK-NEXT: 1 1 0.25 xorq %rsi, %rdi +# CHECK-NEXT: 1 6 0.67 * * xorq %rsi, (%rax) +# CHECK-NEXT: 1 5 0.33 * xorq (%rax), %rdi + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 176.00 176.00 176.00 1677.50 1832.50 1712.50 1475.50 - - - - - - - - 175.33 175.33 175.33 109.00 109.00 109.00 99.50 99.50 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcb $0, %al +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcb $0, %dil +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcb $0, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcb $7, %al +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcb $7, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcb %sil, %dil +# CHECK-NEXT: 0.33 0.33 0.33 1.75 1.75 1.75 1.75 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcb %sil, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adcb (%rax), %dil +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcw $0, %ax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcw $0, %di +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcw $0, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcw $511, %ax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcw $511, %di +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcw $511, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcw $7, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcw %si, %di +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcw %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adcw (%rax), %di +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcl $0, %eax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcl $0, %edi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcl $0, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcl $665536, %eax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcl $665536, %edi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcl $665536, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcl $7, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcl %esi, %edi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcl %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adcl (%rax), %edi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcq $0, %rax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcq $0, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcq $0, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcq $665536, %rax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcq $665536, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcq $665536, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcq $7, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - adcq %rsi, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 adcq %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - adcq (%rax), %rdi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - addb $7, %al +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addb $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addb %sil, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addb %sil, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - addb (%rax), %dil +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - addw $511, %ax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addw $511, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addw $511, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addw $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addw %si, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addw %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - addw (%rax), %di +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - addl $665536, %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addl $665536, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addl $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addl $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addl %esi, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addl %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - addl (%rax), %edi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - addq $665536, %rax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addq $665536, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addq $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addq $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - addq %rsi, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 addq %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - addq (%rax), %rdi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - andb $7, %al +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andb $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andb %sil, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andb %sil, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - andb (%rax), %dil +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - andw $511, %ax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andw $511, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andw $511, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andw $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andw %si, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andw %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - andw (%rax), %di +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - andl $665536, %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andl $665536, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andl $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andl $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andl %esi, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andl %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - andl (%rax), %edi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - andq $665536, %rax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andq $665536, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andq $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andq $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - andq %rsi, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 andq %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - andq (%rax), %rdi +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - bsfw %si, %di +# CHECK-NEXT: - - - - 4.00 - - - - - - - - - - - - - - - - - - bsrw %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bsfw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bsrw (%rax), %di +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - bsfl %esi, %edi +# CHECK-NEXT: - - - - 4.00 - - - - - - - - - - - - - - - - - - bsrl %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bsfl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bsrl (%rax), %edi +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - bsfq %rsi, %rdi +# CHECK-NEXT: - - - - 4.00 - - - - - - - - - - - - - - - - - - bsrq %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bsfq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bsrq (%rax), %rdi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - bswapl %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - bswapq %rax +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - btw %si, %di +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btcw %si, %di +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btrw %si, %di +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btsw %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - btw %si, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btcw %si, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btrw %si, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btsw %si, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - btw $7, %di +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btcw $7, %di +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btrw $7, %di +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btsw $7, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - btw $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btcw $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btrw $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btsw $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - btl %esi, %edi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btcl %esi, %edi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btrl %esi, %edi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btsl %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - btl %esi, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btcl %esi, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btrl %esi, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btsl %esi, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - btl $7, %edi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btcl $7, %edi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btrl $7, %edi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btsl $7, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - btl $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btcl $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btrl $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btsl $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - btq %rsi, %rdi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btcq %rsi, %rdi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btrq %rsi, %rdi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btsq %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - btq %rsi, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btcq %rsi, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btrq %rsi, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btsq %rsi, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - btq $7, %rdi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btcq $7, %rdi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btrq $7, %rdi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - btsq $7, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - btq $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btcq $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btrq $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 btsq $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cbtw +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cwtl +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cltq +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cwtd +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cltd +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cqto +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - clc +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cld +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmc +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpb $7, %al +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpb $7, %dil +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpb $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpb %sil, %dil +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpb %sil, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpb (%rax), %dil +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpw $511, %ax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpw $511, %di +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpw $511, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpw $7, %di +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpw $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpw %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpw %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpw (%rax), %di +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpl $665536, %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpl $665536, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpl $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpl $7, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpl $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpl %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpl %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpl (%rax), %edi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpq $665536, %rax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpq $665536, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpq $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpq $7, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpq $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - cmpq %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpq %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpq (%rax), %rdi +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - cmpsb %es:(%rdi), (%rsi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - cmpsw %es:(%rdi), (%rsi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - cmpsl %es:(%rdi), (%rsi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - cmpsq %es:(%rdi), (%rsi) +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - - - - - - cmpxchgb %cl, %bl +# CHECK-NEXT: 0.33 0.33 0.33 3.00 3.00 3.00 3.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - cmpxchgb %cl, (%rbx) +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - - - - - - cmpxchgw %cx, %bx +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - - - - - - cmpxchgw %cx, (%rbx) +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - - - - - - cmpxchgl %ecx, %ebx +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - - - - - - cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - - - - - - cmpxchgq %rcx, %rbx +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - - - - - - - - - - cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - cpuid +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - decb %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 decb (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - decw %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 decw (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - decl %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 decl (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - decq %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 decq (%rax) +# CHECK-NEXT: - - - 10.00 - - - - - - - - - - - - - - - - - - - divb %dil +# CHECK-NEXT: 0.33 0.33 0.33 10.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divb (%rax) +# CHECK-NEXT: - - - 11.00 - - - - - - - - - - - - - - - - - - - divw %si +# CHECK-NEXT: 0.33 0.33 0.33 11.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divw (%rax) +# CHECK-NEXT: - - - 13.00 - - - - - - - - - - - - - - - - - - - divl %edx +# CHECK-NEXT: 0.33 0.33 0.33 13.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divl (%rax) +# CHECK-NEXT: - - - 17.00 - - - - - - - - - - - - - - - - - - - divq %rcx +# CHECK-NEXT: 0.33 0.33 0.33 17.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divq (%rax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - enter $7, $4095 +# CHECK-NEXT: - - - 10.00 - - - - - - - - - - - - - - - - - - - idivb %dil +# CHECK-NEXT: 0.33 0.33 0.33 10.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivb (%rax) +# CHECK-NEXT: - - - 11.00 - - - - - - - - - - - - - - - - - - - idivw %si +# CHECK-NEXT: 0.33 0.33 0.33 11.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivw (%rax) +# CHECK-NEXT: - - - 13.00 - - - - - - - - - - - - - - - - - - - idivl %edx +# CHECK-NEXT: 0.33 0.33 0.33 13.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivl (%rax) +# CHECK-NEXT: - - - 17.00 - - - - - - - - - - - - - - - - - - - idivq %rcx +# CHECK-NEXT: 0.33 0.33 0.33 17.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivq (%rax) +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - imulb %dil +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulb (%rax) +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - imulw %di +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulw (%rax) +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imulw %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulw (%rax), %di +# CHECK-NEXT: - - - - 4.00 - - - - - - - - - - - - - - - - - - imulw $511, %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulw $511, (%rax), %di +# CHECK-NEXT: - - - - 4.00 - - - - - - - - - - - - - - - - - - imulw $7, %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulw $7, (%rax), %di +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - imull %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imull (%rax) +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imull %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imull (%rax), %edi +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imull $665536, %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imull $665536, (%rax), %edi +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imull $7, %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imull $7, (%rax), %edi +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - imulq %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulq (%rax) +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imulq %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulq (%rax), %rdi +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imulq $665536, %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulq $665536, (%rax), %rdi +# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - imulq $7, %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulq $7, (%rax), %rdi +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - inb $7, %al +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - inb %dx, %al +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - inw $7, %ax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - inw %dx, %ax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - inl $7, %eax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - inl %dx, %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - incb %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 incb (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - incw %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 incw (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - incl %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 incl (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - incq %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 incq (%rax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - insb %dx, %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - insw %dx, %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - insl %dx, %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - int $7 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - invlpg (%rax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - invlpga +# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - lahf +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - leave +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - lodsb (%rsi), %al +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - lodsw (%rsi), %ax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - lodsl (%rsi), %eax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - lodsq (%rsi), %rax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - movsb (%rsi), %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - movsw (%rsi), %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - movsl (%rsi), %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - movsbw %al, %di +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - movzbw %al, %di +# CHECK-NEXT: 1.00 1.00 1.00 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbw (%rax), %di +# CHECK-NEXT: 1.00 1.00 1.00 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbw (%rax), %di +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movsbl %al, %edi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzbl %al, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbl (%rax), %edi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movsbq %al, %rdi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzbq %al, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbq (%rax), %rdi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movswl %ax, %edi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzwl %ax, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwl (%rax), %edi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movswq %ax, %rdi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzwq %ax, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwq (%rax), %rdi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movslq %eax, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movslq (%rax), %rdi +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - mulb %dil +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulb (%rax) +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - mulw %si +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulw (%rax) +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - mull %edx +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mull (%rax) +# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - mulq %rcx +# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulq (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - negb %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 negb (%r8) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - negw %si +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 negw (%r9) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - negl %edx +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 negl (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - negq %rcx +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 negq (%r10) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - nop +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - nopw %di +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - nopw (%rcx) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - nopl %esi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - nopl (%r8) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - nopq %rdx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - nopq (%r9) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - notb %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 notb (%r8) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - notw %si +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 notw (%r9) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - notl %edx +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 notl (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - notq %rcx +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 notq (%r10) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - orb $7, %al +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orb $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orb %sil, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orb %sil, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - orb (%rax), %dil +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - orw $511, %ax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orw $511, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orw $511, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orw $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orw %si, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orw %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - orw (%rax), %di +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - orl $665536, %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orl $665536, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orl $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orl $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orl %esi, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orl %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - orl (%rax), %edi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - orq $665536, %rax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orq $665536, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orq $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orq $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - orq %rsi, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 orq %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - orq (%rax), %rdi +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outb %al, $7 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outb %al, %dx +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outw %ax, $7 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outw %ax, %dx +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outl %eax, $7 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outl %eax, %dx +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outsb (%rsi), %dx +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outsw (%rsi), %dx +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - outsl (%rsi), %dx +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - pause +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - rclb %dil +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - rcrb %dil +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 1.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclb (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 1.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrb (%rax) +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - - - - - - - - - - - - rclb $7, %dil +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - - - - - - - - - - - - rcrb $7, %dil +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclb $7, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrb $7, (%rax) +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - - - - - - - - - - - - rclb %cl, %dil +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - - - - - - - - - - - - rcrb %cl, %dil +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclb %cl, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrb %cl, (%rax) +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - rclw %di +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - rcrw %di +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 1.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclw (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 1.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrw (%rax) +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - - - - - - - - - - - - rclw $7, %di +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - - - - - - - - - - - - rcrw $7, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclw $7, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrw $7, (%rax) +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - - - - - - - - - - - - rclw %cl, %di +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - - - - - - - - - - - - rcrw %cl, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclw %cl, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrw %cl, (%rax) +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - rcll %edi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - rcrl %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 1.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcll (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 1.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrl (%rax) +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - - - - - - - - - - - - rcll $7, %edi +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - - - - - - - - - - - - rcrl $7, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcll $7, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrl $7, (%rax) +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - - - - - - - - - - - - rcll %cl, %edi +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - - - - - - - - - - - - rcrl %cl, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcll %cl, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrl %cl, (%rax) +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - rclq %rdi +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - - - - - - - - - - - - rcrq %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 1.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclq (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 1.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrq (%rax) +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - - - - - - - - - - - - rclq $7, %rdi +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - - - - - - - - - - - - rcrq $7, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclq $7, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrq $7, (%rax) +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - - - - - - - - - - - - rclq %cl, %rdi +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - - - - - - - - - - - - rcrq %cl, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rclq %cl, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 4.00 4.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - rcrq %cl, (%rax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdmsr +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdpmc +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdtsc +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - rdtscp +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolb %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorb %dil +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolb (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorb (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolb $7, %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolb $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorb $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolb %cl, %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorb %cl, %dil +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolb %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorb %cl, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolw %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorw %di +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolw (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorw (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolw $7, %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolw $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorw $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolw %cl, %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorw %cl, %di +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolw %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorw %cl, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - roll %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorl %edi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 roll (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorl (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - roll $7, %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 roll $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorl $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - roll %cl, %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorl %cl, %edi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 roll %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorl %cl, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolq %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorq %rdi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolq (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorq (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolq $7, %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolq $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorq $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rolq %cl, %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - rorq %cl, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rolq %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 rorq %cl, (%rax) +# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - sahf +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarb %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlb %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrb %dil +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarb (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlb (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrb (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarb $7, %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlb $7, %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarb $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlb $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrb $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarb %cl, %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlb %cl, %dil +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrb %cl, %dil +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarb %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlb %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrb %cl, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarw %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlw %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrw %di +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarw (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlw (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrw (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarw $7, %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlw $7, %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarw $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlw $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrw $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarw %cl, %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlw %cl, %di +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrw %cl, %di +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarw %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlw %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrw %cl, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarl %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shll %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrl %edi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarl (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shll (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrl (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarl $7, %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shll $7, %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarl $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shll $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrl $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarl %cl, %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shll %cl, %edi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrl %cl, %edi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarl %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shll %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrl %cl, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarq %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlq %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrq %rdi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarq (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlq (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrq (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarq $7, %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlq $7, %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarq $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlq $7, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrq $7, (%rax) +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - sarq %cl, %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shlq %cl, %rdi +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - shrq %cl, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sarq %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shlq %cl, (%rax) +# CHECK-NEXT: 0.67 0.67 0.67 - 0.50 0.50 - - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 shrq %cl, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbb $0, %al +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbb $0, %dil +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbb $0, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbb $7, %al +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbb $7, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbb %sil, %dil +# CHECK-NEXT: 0.33 0.33 0.33 1.75 1.75 1.75 1.75 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbb %sil, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sbbb (%rax), %dil +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbw $0, %ax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbw $0, %di +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbw $0, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbw $511, %ax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbw $511, %di +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbw $511, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbw $7, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbw %si, %di +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbw %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sbbw (%rax), %di +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbl $0, %eax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbl $0, %edi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbl $0, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbl $665536, %eax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbl $665536, %edi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbl $665536, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbl $7, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbl %esi, %edi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbl %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sbbl (%rax), %edi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbq $0, %rax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbq $0, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbq $0, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbq $665536, %rax +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbq $665536, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbq $665536, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbq $7, (%rax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - sbbq %rsi, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 1.00 1.00 1.00 1.00 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 sbbq %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 1.00 1.00 1.00 1.00 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - sbbq (%rax), %rdi +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - scasb %es:(%rdi), %al +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - scasw %es:(%rdi), %ax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - scasl %es:(%rdi), %eax +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - scasq %es:(%rdi), %rax +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - seto %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 seto (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setno %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setno (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setb %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setb (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setae %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setae (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - sete %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 sete (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setne %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setne (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - seta %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 seta (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setbe %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setbe (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - sets %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 sets (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setns %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setns (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setp %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setp (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setnp %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setnp (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setl %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setl (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setge %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setge (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setg %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setg (%rax) +# CHECK-NEXT: - - - 1.00 - - 1.00 - - - - - - - - - - - - - - - - setle %al +# CHECK-NEXT: 0.33 0.33 0.33 1.00 - - 1.00 - - - - - - - - 0.33 0.33 0.33 - - - 0.50 0.50 setle (%rax) +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shldw %cl, %si, %di +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shrdw %cl, %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shldw %cl, %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shrdw %cl, %si, (%rax) +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shldw $7, %si, %di +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shrdw $7, %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shldw $7, %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shrdw $7, %si, (%rax) +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shldl %cl, %esi, %edi +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shrdl %cl, %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shldl %cl, %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shrdl %cl, %esi, (%rax) +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shldl $7, %esi, %edi +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shrdl $7, %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shldl $7, %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shrdl $7, %esi, (%rax) +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shldq %cl, %rsi, %rdi +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shrdq %cl, %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shldq %cl, %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shrdq %cl, %rsi, (%rax) +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shldq $7, %rsi, %rdi +# CHECK-NEXT: - - - - 1.50 1.50 - - - - - - - - - - - - - - - - - shrdq $7, %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shldq $7, %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 2.00 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - shrdq $7, %rsi, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - stc +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - std +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - stosb %al, %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - stosw %ax, %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - stosl %eax, %es:(%rdi) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - stosq %rax, %es:(%rdi) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - subb $7, %al +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subb $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subb %sil, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subb %sil, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - subb (%rax), %dil +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - subw $511, %ax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subw $511, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subw $511, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subw $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subw %si, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subw %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - subw (%rax), %di +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - subl $665536, %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subl $665536, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subl $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subl $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subl %esi, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subl %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - subl (%rax), %edi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - subq $665536, %rax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subq $665536, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subq $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subq $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - subq %rsi, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 subq %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - subq (%rax), %rdi +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testb $7, %al +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testb $7, %dil +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testb $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testb %sil, %dil +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testb %sil, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testw $511, %ax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testw $511, %di +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testw $511, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testw $7, %di +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testw $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testw %si, %di +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testw %si, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testl $665536, %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testl $665536, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testl $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testl $7, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testl $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testl %esi, %edi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testl %esi, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testq $665536, %rax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testq $665536, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testq $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testq $7, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testq $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - testq %rsi, %rdi +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - testq %rsi, (%rax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - ud2 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - wrmsr +# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - - - - - - xaddb %bl, %cl +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xaddb %bl, (%rcx) +# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - - - - - - xaddw %bx, %cx +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xaddw %ax, (%rbx) +# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - - - - - - xaddl %ebx, %ecx +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xaddl %eax, (%rbx) +# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - - - - - - xaddq %rbx, %rcx +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xaddq %rax, (%rbx) +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - xchgb %bl, %cl +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xchgb %bl, (%rbx) +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - xchgw %bx, %ax +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - - - - - - xchgw %bx, %cx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xchgw %ax, (%rbx) +# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - - - - - - xchgl %ebx, %eax +# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - - - - - - xchgl %ebx, %ecx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xchgl %eax, (%rbx) +# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - - - - - - xchgq %rbx, %rax +# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 - - - - - - - - - - - - - - - - xchgq %rbx, %rcx +# CHECK-NEXT: 0.33 0.33 0.33 0.50 0.50 0.50 0.50 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xchgq %rax, (%rbx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xlatb +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - xorb $7, %al +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorb $7, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorb $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorb %sil, %dil +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorb %sil, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xorb (%rax), %dil +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - xorw $511, %ax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorw $511, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorw $511, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorw $7, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorw $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorw %si, %di +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorw %si, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xorw (%rax), %di +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - xorl $665536, %eax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorl $665536, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorl $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorl $7, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorl $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorl %esi, %edi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorl %esi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xorl (%rax), %edi +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - xorq $665536, %rax +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorq $665536, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorq $665536, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorq $7, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorq $7, (%rax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - xorq %rsi, %rdi +# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 xorq %rsi, (%rax) +# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - xorq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s @@ -0,0 +1,536 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -instruction-tables < %s | FileCheck %s + +f2xm1 + +fabs + +fadd %st, %st(1) +fadd %st(2) +fadds (%ecx) +faddl (%ecx) +faddp %st(1) +faddp %st(2) +fiadds (%ecx) +fiaddl (%ecx) + +fbld (%ecx) +fbstp (%eax) + +fchs + +fnclex + +fcmovb %st(1), %st +fcmovbe %st(1), %st +fcmove %st(1), %st +fcmovnb %st(1), %st +fcmovnbe %st(1), %st +fcmovne %st(1), %st +fcmovnu %st(1), %st +fcmovu %st(1), %st + +fcom %st(1) +fcom %st(3) +fcoms (%ecx) +fcoml (%eax) +fcomp %st(1) +fcomp %st(3) +fcomps (%ecx) +fcompl (%eax) +fcompp + +fcomi %st(3) +fcompi %st(3) + +fcos + +fdecstp + +fdiv %st, %st(1) +fdiv %st(2) +fdivs (%ecx) +fdivl (%eax) +fdivp %st(1) +fdivp %st(2) +fidivs (%ecx) +fidivl (%eax) + +fdivr %st, %st(1) +fdivr %st(2) +fdivrs (%ecx) +fdivrl (%eax) +fdivrp %st(1) +fdivrp %st(2) +fidivrs (%ecx) +fidivrl (%eax) + +ffree %st(0) + +ficoms (%ecx) +ficoml (%eax) +ficomps (%ecx) +ficompl (%eax) + +filds (%edx) +fildl (%ecx) +fildll (%eax) + +fincstp + +fninit + +fists (%edx) +fistl (%ecx) +fistps (%edx) +fistpl (%ecx) +fistpll (%eax) + +fisttps (%edx) +fisttpl (%ecx) +fisttpll (%eax) + +fld %st(0) +flds (%edx) +fldl (%ecx) +fldt (%eax) + +fldcw (%eax) +fldenv (%eax) + +fld1 +fldl2e +fldl2t +fldlg2 +fldln2 +fldpi +fldz + +fmul %st, %st(1) +fmul %st(2) +fmuls (%ecx) +fmull (%eax) +fmulp %st(1) +fmulp %st(2) +fimuls (%ecx) +fimull (%eax) + +fnop + +fpatan + +fprem +fprem1 + +fptan + +frndint + +frstor (%eax) + +fnsave (%eax) + +fscale + +fsin + +fsincos + +fsqrt + +fst %st(0) +fsts (%edx) +fstl (%ecx) +fstp %st(0) +fstpl (%edx) +fstpl (%ecx) +fstpt (%eax) + +fnstcw (%eax) +fnstenv (%eax) +fnstsw (%eax) + +frstor (%eax) +fsave (%eax) + +fsub %st, %st(1) +fsub %st(2) +fsubs (%ecx) +fsubl (%eax) +fsubp %st(1) +fsubp %st(2) +fisubs (%ecx) +fisubl (%eax) + +fsubr %st, %st(1) +fsubr %st(2) +fsubrs (%ecx) +fsubrl (%eax) +fsubrp %st(1) +fsubrp %st(2) +fisubrs (%ecx) +fisubrl (%eax) + +ftst + +fucom %st(1) +fucom %st(3) +fucomp %st(1) +fucomp %st(3) +fucompp + +fucomi %st(3) +fucompi %st(3) + +fwait + +fxam + +fxch %st(1) +fxch %st(3) + +fxrstor (%eax) +fxsave (%eax) + +fxtract + +fyl2x +fyl2xp1 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 100 100 25.00 U f2xm1 +# CHECK-NEXT: 1 1 1.00 U fabs +# CHECK-NEXT: 1 3 0.50 U fadd %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fadd %st(2), %st +# CHECK-NEXT: 1 10 0.50 * U fadds (%ecx) +# CHECK-NEXT: 1 10 0.50 * U faddl (%ecx) +# CHECK-NEXT: 1 3 0.50 U faddp %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U faddp %st, %st(2) +# CHECK-NEXT: 2 5 6.00 * U fiadds (%ecx) +# CHECK-NEXT: 2 5 6.00 * U fiaddl (%ecx) +# CHECK-NEXT: 100 100 25.00 * U fbld (%ecx) +# CHECK-NEXT: 100 100 25.00 * U fbstp (%eax) +# CHECK-NEXT: 1 1 1.00 U fchs +# CHECK-NEXT: 100 100 25.00 U fnclex +# CHECK-NEXT: 7 7 7.00 U fcmovb %st(1), %st +# CHECK-NEXT: 7 7 7.00 U fcmovbe %st(1), %st +# CHECK-NEXT: 7 7 7.00 U fcmove %st(1), %st +# CHECK-NEXT: 7 7 7.00 U fcmovnb %st(1), %st +# CHECK-NEXT: 7 7 7.00 U fcmovnbe %st(1), %st +# CHECK-NEXT: 7 7 7.00 U fcmovne %st(1), %st +# CHECK-NEXT: 7 7 7.00 U fcmovnu %st(1), %st +# CHECK-NEXT: 7 7 7.00 U fcmovu %st(1), %st +# CHECK-NEXT: 1 3 1.00 U fcom %st(1) +# CHECK-NEXT: 1 3 1.00 U fcom %st(3) +# CHECK-NEXT: 1 10 1.00 * U fcoms (%ecx) +# CHECK-NEXT: 1 10 1.00 * U fcoml (%eax) +# CHECK-NEXT: 1 3 1.00 U fcomp %st(1) +# CHECK-NEXT: 1 3 1.00 U fcomp %st(3) +# CHECK-NEXT: 1 10 1.00 * U fcomps (%ecx) +# CHECK-NEXT: 1 10 1.00 * U fcompl (%eax) +# CHECK-NEXT: 100 100 25.00 U fcompp +# CHECK-NEXT: 1 3 1.00 U fcomi %st(3), %st +# CHECK-NEXT: 1 3 1.00 U fcompi %st(3), %st +# CHECK-NEXT: 100 100 25.00 U fcos +# CHECK-NEXT: 100 100 25.00 U fdecstp +# CHECK-NEXT: 1 11 3.00 U fdiv %st, %st(1) +# CHECK-NEXT: 1 11 3.00 U fdiv %st(2), %st +# CHECK-NEXT: 1 18 3.00 * U fdivs (%ecx) +# CHECK-NEXT: 1 18 3.00 * U fdivl (%eax) +# CHECK-NEXT: 1 11 3.00 U fdivp %st, %st(1) +# CHECK-NEXT: 1 11 3.00 U fdivp %st, %st(2) +# CHECK-NEXT: 2 5 15.50 * U fidivs (%ecx) +# CHECK-NEXT: 2 5 15.50 * U fidivl (%eax) +# CHECK-NEXT: 1 11 3.00 U fdivr %st, %st(1) +# CHECK-NEXT: 1 11 3.00 U fdivr %st(2), %st +# CHECK-NEXT: 1 18 3.00 * U fdivrs (%ecx) +# CHECK-NEXT: 1 18 3.00 * U fdivrl (%eax) +# CHECK-NEXT: 1 11 3.00 U fdivrp %st, %st(1) +# CHECK-NEXT: 1 11 3.00 U fdivrp %st, %st(2) +# CHECK-NEXT: 2 5 15.50 * U fidivrs (%ecx) +# CHECK-NEXT: 2 5 15.50 * U fidivrl (%eax) +# CHECK-NEXT: 100 100 25.00 U ffree %st(0) +# CHECK-NEXT: 1 10 1.00 * U ficoms (%ecx) +# CHECK-NEXT: 1 10 1.00 * U ficoml (%eax) +# CHECK-NEXT: 1 10 1.00 * U ficomps (%ecx) +# CHECK-NEXT: 1 10 1.00 * U ficompl (%eax) +# CHECK-NEXT: 1 5 0.33 * U filds (%edx) +# CHECK-NEXT: 1 5 0.33 * U fildl (%ecx) +# CHECK-NEXT: 1 5 0.33 * U fildll (%eax) +# CHECK-NEXT: 100 100 25.00 U fincstp +# CHECK-NEXT: 100 100 25.00 U fninit +# CHECK-NEXT: 1 1 1.00 * U fists (%edx) +# CHECK-NEXT: 1 1 1.00 * U fistl (%ecx) +# CHECK-NEXT: 1 1 1.00 * U fistps (%edx) +# CHECK-NEXT: 1 1 1.00 * U fistpl (%ecx) +# CHECK-NEXT: 1 1 1.00 * U fistpll (%eax) +# CHECK-NEXT: 1 1 1.00 * U fisttps (%edx) +# CHECK-NEXT: 1 1 1.00 * U fisttpl (%ecx) +# CHECK-NEXT: 1 1 1.00 * U fisttpll (%eax) +# CHECK-NEXT: 1 1 1.00 U fld %st(0) +# CHECK-NEXT: 1 5 0.33 * U flds (%edx) +# CHECK-NEXT: 1 5 0.33 * U fldl (%ecx) +# CHECK-NEXT: 1 5 0.33 * U fldt (%eax) +# CHECK-NEXT: 1 5 0.33 * U fldcw (%eax) +# CHECK-NEXT: 100 100 25.00 * U fldenv (%eax) +# CHECK-NEXT: 1 11 1.00 U fld1 +# CHECK-NEXT: 1 11 1.00 U fldl2e +# CHECK-NEXT: 1 11 1.00 U fldl2t +# CHECK-NEXT: 1 11 1.00 U fldlg2 +# CHECK-NEXT: 1 11 1.00 U fldln2 +# CHECK-NEXT: 1 11 1.00 U fldpi +# CHECK-NEXT: 1 8 1.00 U fldz +# CHECK-NEXT: 1 3 0.50 U fmul %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fmul %st(2), %st +# CHECK-NEXT: 1 10 0.50 * U fmuls (%ecx) +# CHECK-NEXT: 1 10 0.50 * U fmull (%eax) +# CHECK-NEXT: 1 3 0.50 U fmulp %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fmulp %st, %st(2) +# CHECK-NEXT: 2 5 6.00 * U fimuls (%ecx) +# CHECK-NEXT: 2 5 6.00 * U fimull (%eax) +# CHECK-NEXT: 1 0 0.25 U fnop +# CHECK-NEXT: 100 100 25.00 U fpatan +# CHECK-NEXT: 100 100 25.00 U fprem +# CHECK-NEXT: 100 100 25.00 U fprem1 +# CHECK-NEXT: 100 100 25.00 U fptan +# CHECK-NEXT: 100 100 25.00 U frndint +# CHECK-NEXT: 100 100 25.00 * U frstor (%eax) +# CHECK-NEXT: 100 100 25.00 * U fnsave (%eax) +# CHECK-NEXT: 100 100 25.00 U fscale +# CHECK-NEXT: 100 100 25.00 U fsin +# CHECK-NEXT: 100 100 25.00 U fsincos +# CHECK-NEXT: 1 22 23.00 U fsqrt +# CHECK-NEXT: 1 1 1.00 U fst %st(0) +# CHECK-NEXT: 1 1 1.00 * U fsts (%edx) +# CHECK-NEXT: 1 1 1.00 * U fstl (%ecx) +# CHECK-NEXT: 1 1 1.00 U fstp %st(0) +# CHECK-NEXT: 1 1 1.00 * U fstpl (%edx) +# CHECK-NEXT: 1 1 1.00 * U fstpl (%ecx) +# CHECK-NEXT: 1 1 1.00 * U fstpt (%eax) +# CHECK-NEXT: 1 1 0.25 * U fnstcw (%eax) +# CHECK-NEXT: 100 100 25.00 * U fnstenv (%eax) +# CHECK-NEXT: 100 100 25.00 * U fnstsw (%eax) +# CHECK-NEXT: 100 100 25.00 * U frstor (%eax) +# CHECK-NEXT: 100 100 25.00 U wait +# CHECK-NEXT: 100 100 25.00 * U fnsave (%eax) +# CHECK-NEXT: 1 3 0.50 U fsub %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fsub %st(2), %st +# CHECK-NEXT: 1 10 0.50 * U fsubs (%ecx) +# CHECK-NEXT: 1 10 0.50 * U fsubl (%eax) +# CHECK-NEXT: 1 3 0.50 U fsubp %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fsubp %st, %st(2) +# CHECK-NEXT: 2 5 6.00 * U fisubs (%ecx) +# CHECK-NEXT: 2 5 6.00 * U fisubl (%eax) +# CHECK-NEXT: 1 3 0.50 U fsubr %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fsubr %st(2), %st +# CHECK-NEXT: 1 10 0.50 * U fsubrs (%ecx) +# CHECK-NEXT: 1 10 0.50 * U fsubrl (%eax) +# CHECK-NEXT: 1 3 0.50 U fsubrp %st, %st(1) +# CHECK-NEXT: 1 3 0.50 U fsubrp %st, %st(2) +# CHECK-NEXT: 2 5 6.00 * U fisubrs (%ecx) +# CHECK-NEXT: 2 5 6.00 * U fisubrl (%eax) +# CHECK-NEXT: 1 3 1.00 U ftst +# CHECK-NEXT: 1 3 1.00 U fucom %st(1) +# CHECK-NEXT: 1 3 1.00 U fucom %st(3) +# CHECK-NEXT: 1 3 1.00 U fucomp %st(1) +# CHECK-NEXT: 1 3 1.00 U fucomp %st(3) +# CHECK-NEXT: 1 3 1.00 U fucompp +# CHECK-NEXT: 1 3 1.00 U fucomi %st(3), %st +# CHECK-NEXT: 1 3 1.00 U fucompi %st(3), %st +# CHECK-NEXT: 100 100 25.00 U wait +# CHECK-NEXT: 100 100 25.00 U fxam +# CHECK-NEXT: 1 1 1.00 U fxch %st(1) +# CHECK-NEXT: 1 1 1.00 U fxch %st(3) +# CHECK-NEXT: 100 100 25.00 * * U fxrstor (%eax) +# CHECK-NEXT: 100 100 25.00 * * U fxsave (%eax) +# CHECK-NEXT: 100 100 25.00 U fxtract +# CHECK-NEXT: 100 100 25.00 U fyl2x +# CHECK-NEXT: 100 100 25.00 U fyl2xp1 + +# CHECK: Resources: +# CHECK-NEXT: [0] - Zn3AGU0 +# CHECK-NEXT: [1] - Zn3AGU1 +# CHECK-NEXT: [2] - Zn3AGU2 +# CHECK-NEXT: [3] - Zn3ALU0 +# CHECK-NEXT: [4] - Zn3ALU1 +# CHECK-NEXT: [5] - Zn3ALU2 +# CHECK-NEXT: [6] - Zn3ALU3 +# CHECK-NEXT: [7] - Zn3BRU1 +# CHECK-NEXT: [8] - Zn3FPP0 +# CHECK-NEXT: [9] - Zn3FPP1 +# CHECK-NEXT: [10] - Zn3FPP2 +# CHECK-NEXT: [11] - Zn3FPP3 +# CHECK-NEXT: [12.0] - Zn3FPP45 +# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [13] - Zn3FPSt +# CHECK-NEXT: [14.0] - Zn3LSU +# CHECK-NEXT: [14.1] - Zn3LSU +# CHECK-NEXT: [14.2] - Zn3LSU +# CHECK-NEXT: [15.0] - Zn3Load +# CHECK-NEXT: [15.1] - Zn3Load +# CHECK-NEXT: [15.2] - Zn3Load +# CHECK-NEXT: [16.0] - Zn3Store +# CHECK-NEXT: [16.1] - Zn3Store + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] +# CHECK-NEXT: 10.67 10.67 10.67 886.50 886.50 886.50 886.50 - 137.00 203.00 119.00 119.00 13.50 13.50 - 24.00 24.00 24.00 15.33 15.33 15.33 13.00 13.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - f2xm1 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fabs +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fadd %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fadd %st(2), %st +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fadds (%ecx) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - faddl (%ecx) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - faddp %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - faddp %st, %st(2) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 6.00 6.00 6.00 6.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fiadds (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 6.00 6.00 6.00 6.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fiaddl (%ecx) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fbld (%ecx) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fbstp (%eax) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fchs +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fnclex +# CHECK-NEXT: - - - 7.00 7.00 7.00 7.00 - - - - - - - - - - - - - - - - fcmovb %st(1), %st +# CHECK-NEXT: - - - 7.00 7.00 7.00 7.00 - - - - - - - - - - - - - - - - fcmovbe %st(1), %st +# CHECK-NEXT: - - - 7.00 7.00 7.00 7.00 - - - - - - - - - - - - - - - - fcmove %st(1), %st +# CHECK-NEXT: - - - 7.00 7.00 7.00 7.00 - - - - - - - - - - - - - - - - fcmovnb %st(1), %st +# CHECK-NEXT: - - - 7.00 7.00 7.00 7.00 - - - - - - - - - - - - - - - - fcmovnbe %st(1), %st +# CHECK-NEXT: - - - 7.00 7.00 7.00 7.00 - - - - - - - - - - - - - - - - fcmovne %st(1), %st +# CHECK-NEXT: - - - 7.00 7.00 7.00 7.00 - - - - - - - - - - - - - - - - fcmovnu %st(1), %st +# CHECK-NEXT: - - - 7.00 7.00 7.00 7.00 - - - - - - - - - - - - - - - - fcmovu %st(1), %st +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fcom %st(1) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fcom %st(3) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fcoms (%ecx) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fcoml (%eax) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fcomp %st(1) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fcomp %st(3) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fcomps (%ecx) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fcompl (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fcompp +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fcomi %st(3), %st +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fcompi %st(3), %st +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fcos +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fdecstp +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - fdiv %st, %st(1) +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - fdiv %st(2), %st +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fdivs (%ecx) +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fdivl (%eax) +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - fdivp %st, %st(1) +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - fdivp %st, %st(2) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 15.50 15.50 15.50 15.50 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fidivs (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 15.50 15.50 15.50 15.50 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fidivl (%eax) +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - fdivr %st, %st(1) +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - fdivr %st(2), %st +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fdivrs (%ecx) +# CHECK-NEXT: - - - - - - - - - 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fdivrl (%eax) +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - fdivrp %st, %st(1) +# CHECK-NEXT: - - - - - - - - - 3.00 - - - - - - - - - - - - - fdivrp %st, %st(2) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 15.50 15.50 15.50 15.50 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fidivrs (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 15.50 15.50 15.50 15.50 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fidivrl (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - ffree %st(0) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - ficoms (%ecx) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - ficoml (%eax) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - ficomps (%ecx) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - ficompl (%eax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - filds (%edx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fildl (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fildll (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fincstp +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fninit +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fists (%edx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fistl (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fistps (%edx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fistpl (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fistpll (%eax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fisttps (%edx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fisttpl (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fisttpll (%eax) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - fld %st(0) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - flds (%edx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldl (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldt (%eax) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldcw (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fldenv (%eax) +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fld1 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldl2e +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldl2t +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldlg2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldln2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldpi +# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fldz +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - fmul %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - fmul %st(2), %st +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fmuls (%ecx) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fmull (%eax) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - fmulp %st, %st(1) +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - - - - - - - - - fmulp %st, %st(2) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 6.00 6.00 6.00 6.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fimuls (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 6.00 6.00 6.00 6.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fimull (%eax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - fnop +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fpatan +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fprem +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fprem1 +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fptan +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - frndint +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - frstor (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fnsave (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fscale +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fsin +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fsincos +# CHECK-NEXT: - - - - - - - - - 23.00 - - - - - - - - - - - - - fsqrt +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - fst %st(0) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fsts (%edx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fstl (%ecx) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - fstp %st(0) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fstpl (%edx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fstpl (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.67 0.67 0.67 - - - 1.00 1.00 fstpt (%eax) +# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - fnstcw (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fnstenv (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fnstsw (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - frstor (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - wait +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fnsave (%eax) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fsub %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fsub %st(2), %st +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fsubs (%ecx) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fsubl (%eax) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fsubp %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fsubp %st, %st(2) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 6.00 6.00 6.00 6.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fisubs (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 6.00 6.00 6.00 6.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fisubl (%eax) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fsubr %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fsubr %st(2), %st +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fsubrs (%ecx) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - fsubrl (%eax) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fsubrp %st, %st(1) +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - fsubrp %st, %st(2) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 6.00 6.00 6.00 6.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fisubrs (%ecx) +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 6.00 6.00 6.00 6.00 - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - fisubrl (%eax) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - ftst +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fucom %st(1) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fucom %st(3) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fucomp %st(1) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fucomp %st(3) +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fucompp +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fucomi %st(3), %st +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - - - - - - - - - - fucompi %st(3), %st +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - wait +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fxam +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - fxch %st(1) +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - fxch %st(3) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fxrstor (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fxsave (%eax) +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fxtract +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fyl2x +# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - fyl2xp1 diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s --- a/llvm/test/tools/llvm-mca/X86/cpus.s +++ b/llvm/test/tools/llvm-mca/X86/cpus.s @@ -4,6 +4,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,BTVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,ZNVER2 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver3 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,ZNVER3 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SANDYBRIDGE %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,IVYBRIDGE %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,HASWELL %s @@ -84,3 +85,8 @@ # ZNVER2-NEXT: uOps Per Cycle: 0.97 # ZNVER2-NEXT: IPC: 0.97 # ZNVER2-NEXT: Block RThroughput: 0.3 + +# ZNVER3: Dispatch Width: 6 +# ZNVER3-NEXT: uOps Per Cycle: 0.97 +# ZNVER3-NEXT: IPC: 0.97 +# ZNVER3-NEXT: Block RThroughput: 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/in-order-cpu.s b/llvm/test/tools/llvm-mca/X86/in-order-cpu.s --- a/llvm/test/tools/llvm-mca/X86/in-order-cpu.s +++ b/llvm/test/tools/llvm-mca/X86/in-order-cpu.s @@ -1,3 +1,5 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=atom -o /dev/null 2>&1 | FileCheck %s -# CHECK: warning: support for in-order CPU 'atom' is experimental. movsbw %al, %di + +# CHECK: warning: support for in-order CPU 'atom' is experimental. diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s @@ -8,6 +8,7 @@ # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BTVER2 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER1 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER2 +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER3 vdivps %xmm0, %xmm1, %xmm1 vaddps (%rax), %xmm1, %xmm1 @@ -42,6 +43,9 @@ # ZNVER2-NEXT: Total Cycles: 21 # ZNVER2-NEXT: Total uOps: 2 +# ZNVER3-NEXT: Total Cycles: 17 +# ZNVER3-NEXT: Total uOps: 2 + # BARCELONA: Dispatch Width: 4 # BARCELONA-NEXT: uOps Per Cycle: 0.15 # BARCELONA-NEXT: IPC: 0.10 @@ -87,6 +91,11 @@ # ZNVER2-NEXT: IPC: 0.10 # ZNVER2-NEXT: Block RThroughput: 1.0 +# ZNVER3: Dispatch Width: 6 +# ZNVER3-NEXT: uOps Per Cycle: 0.12 +# ZNVER3-NEXT: IPC: 0.12 +# ZNVER3-NEXT: Block RThroughput: 3.0 + # ALL: Timeline view: # BARCELONA-NEXT: 0123456789 @@ -116,6 +125,9 @@ # ZNVER2-NEXT: 0123456789 # ZNVER2-NEXT: Index 0123456789 0 +# ZNVER3-NEXT: 0123456 +# ZNVER3-NEXT: Index 0123456789 + # BARCELONA: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # BARCELONA-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 @@ -143,6 +155,9 @@ # ZNVER2: [0,0] DeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # ZNVER2-NEXT: [0,1] D========eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# ZNVER3: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 +# ZNVER3-NEXT: [0,1] D====eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue @@ -178,3 +193,6 @@ # ZNVER2-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 # ZNVER2-NEXT: 1 5.0 0.5 0.0 + +# ZNVER3-NEXT: 1. 1 5.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# ZNVER3-NEXT: 1 3.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/register-file-statistics.s b/llvm/test/tools/llvm-mca/X86/register-file-statistics.s --- a/llvm/test/tools/llvm-mca/X86/register-file-statistics.s +++ b/llvm/test/tools/llvm-mca/X86/register-file-statistics.s @@ -5,6 +5,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,BTVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,ZNVER2 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,ZNVER3 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s @@ -44,6 +45,11 @@ # ZNVER2-NEXT: Total number of mappings created: 0 # ZNVER2-NEXT: Max number of mappings used: 0 +# ZNVER3: * Register File #1 -- Zn3FpPRF: +# ZNVER3-NEXT: Number of physical registers: 160 +# ZNVER3-NEXT: Total number of mappings created: 0 +# ZNVER3-NEXT: Max number of mappings used: 0 + # BDVER2: * Register File #2 -- PdIntegerPRF: # BDVER2-NEXT: Number of physical registers: 96 # BDVER2-NEXT: Total number of mappings created: 2 @@ -63,3 +69,8 @@ # ZNVER2-NEXT: Number of physical registers: 168 # ZNVER2-NEXT: Total number of mappings created: 2 # ZNVER2-NEXT: Max number of mappings used: 2 + +# ZNVER3: * Register File #2 -- Zn3IntegerPRF: +# ZNVER3-NEXT: Number of physical registers: 192 +# ZNVER3-NEXT: Total number of mappings created: 2 +# ZNVER3-NEXT: Max number of mappings used: 2 diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s --- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -4,6 +4,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BTVER2 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER2 %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER3 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SNB %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,IVB %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,HSW %s @@ -95,6 +96,12 @@ # ZNVER2-NEXT: [3] Maximum number of used buffer entries. # ZNVER2-NEXT: [4] Total number of buffer entries. +# ZNVER3: Scheduler's queue usage: +# ZNVER3-NEXT: [1] Resource name. +# ZNVER3-NEXT: [2] Average number of used buffer entries. +# ZNVER3-NEXT: [3] Maximum number of used buffer entries. +# ZNVER3-NEXT: [4] Total number of buffer entries. + # BARCELONA: [1] [2] [3] [4] # BARCELONA-NEXT: SBPortAny 0 1 54 @@ -139,3 +146,9 @@ # ZNVER2-NEXT: Zn2AGU 0 0 28 # ZNVER2-NEXT: Zn2ALU 0 1 64 # ZNVER2-NEXT: Zn2FPU 0 0 36 + +# ZNVER3: [1] [2] [3] [4] +# ZNVER3-NEXT: Zn3FP 0 0 64 +# ZNVER3-NEXT: Zn3Int 0 1 96 +# ZNVER3-NEXT: Zn3Load 0 0 72 +# ZNVER3-NEXT: Zn3Store 0 0 64 diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/SparseTensor/IR/CMakeLists.txt --- a/mlir/include/mlir/Dialect/SparseTensor/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/CMakeLists.txt @@ -1,2 +1,7 @@ add_mlir_dialect(SparseTensorOps sparse_tensor) add_mlir_doc(SparseTensorOps SparseTensorOps Dialects/ -gen-dialect-doc) + +set(LLVM_TARGET_DEFINITIONS SparseTensorAttrDefs.td) +mlir_tablegen(SparseTensorAttrDefs.h.inc -gen-attrdef-decls) +mlir_tablegen(SparseTensorAttrDefs.cpp.inc -gen-attrdef-defs) +add_public_tablegen_target(MLIRSparseTensorAttrDefsIncGen) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h @@ -13,8 +13,12 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/TensorEncoding.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.h.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/SparseTensor/IR/SparseTensorOps.h.inc" diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td rename from mlir/include/mlir/Dialect/Tensor/IR/TensorAttrDefs.td rename to mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorAttrDefs.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td @@ -1,4 +1,4 @@ -//===-- TensorAttrDefs.td - Tensor Attributes Definitions --*- tablegen -*-===// +//===-- SparseTensorAttrDefs.td - attributes definitions ---*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,77 +6,75 @@ // //===----------------------------------------------------------------------===// -#ifndef TENSOR_ATTRDEFS -#define TENSOR_ATTRDEFS +#ifndef SPARSETENSOR_ATTRDEFS +#define SPARSETENSOR_ATTRDEFS -include "mlir/Dialect/Tensor/IR/TensorBase.td" +include "mlir/Dialect/SparseTensor/IR/SparseTensorBase.td" include "mlir/IR/TensorEncoding.td" // All of the Tensor attributes will extend this class. -class Tensor_Attr traits = []> : AttrDef; +class SparseTensor_Attr traits = []> + : AttrDef; // Sparse tensor encoding attribute. -def SparseTensorEncodingAttr : Tensor_Attr<"SparseTensorEncoding", +def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding", [ DeclareAttrInterfaceMethods ] > { - let mnemonic = "sparse"; + let mnemonic = "encoding"; let description = [{ An attribute to encode "TACO"-style information (see tensor-compiler.org) - on the sparsity of tensors. The semantics are defined by means of the - methods getDimLevelType(), getDimOrdering(), getPointerType(), and - getIndexType(), documented below. The encoding is eventually used by - a `sparse compiler` pass to generate sparse code fully automatically + on sparsity properties of tensors. The encoding is eventually used by a + `sparse compiler` pass to generate sparse code fully automatically for all tensor expressions that involve tensors with a sparse encoding. Compiler passes that run before this sparse compiler pass need to be aware of the semantics of tensor types with such an encoding. }]; - // All data is stored in a dictionary, interpreted by the methods below. + // Data in sparse tensor encoding. let parameters = ( ins - "DictionaryAttr":$dict - ); - - let extraClassDeclaration = [{ - // Dimension level types that define sparse tensors: - // Dense - dimension is dense, every entry is stored - // Compressed - dimension is sparse, only nonzeros are stored - // Singleton - dimension contains single coordinate, no siblings - enum class DimLevelType { - Dense, Compressed, Singleton - }; - - // Returns the dimension level type in the given dimension `dim` - // of this tensor type. The choices, defined by the `DimLevelType` - // enum, are `dense` (the dimension should be stored in its entirety), + // A dimension level type for each dimension of a tensor type. + // The choices are `dense` (dimension should be stored in its entirety), // `compressed` (only non-zero regions or elements should be stored), // or `singleton` (no sibling elements for parent). - DimLevelType getDimLevelType(unsigned dim) const; - - // Returns the dimension order of this tensor type as an AffineMap. + ArrayRefParameter< + "SparseTensorEncodingAttr::DimLevelType", + "Per-dimension level type" + >: $dimLevelType, + // A dimension order on the indices of this tensor type. // Unlike dense storage, most sparse storage schemes do not provide // fast random access. This affine map specifies the order of // dimensions that should be support by the sparse storage scheme // (e.g. (i,j) -> (i,j) requests 2-d row-wise and (i,j) -> (j,i) // requests 2-d column-wise storage). // TODO: block structure with higher-dim inputs - AffineMap getDimOrdering() const; - - // Returns the required bit width for pointer storage. A narrow width - // reduces the memory footprint of overhead storage, as long as the - // width suffices to define the total required range (viz. the maximum + "AffineMap":$dimOrdering, + // The required bit width for pointer storage. A narrow width reduces + // the memory footprint of overhead storage, as long as the width + // suffices to define the total required range (viz. the maximum // number of stored entries over all indirection dimensions). The choices // are `8`, `16`, `32`, `64`, or `0` for a native width. - unsigned getPointerBitWidth() const; - - // Returns the required bit width for index storage. A narrow width - // reduces the memory footprint of overhead storage, as long as the - // width suffices to define the total required range (viz. the maximum + "unsigned":$pointerBitWidth, + // The required bit width for index storage. A narrow width reduces + // the memory footprint of overhead storage, as long as the width + // suffices to define the total required range (viz. the maximum // value of each tensor index over all dimensions). The choices are `8`, // `16`, `32`, `64`, or `0` for a native width. - unsigned getIndexBitWidth() const; + "unsigned":$indexBitWidth + ); + + let genVerifyDecl = 1; + + let extraClassDeclaration = [{ + // Dimension level types that define sparse tensors: + // Dense - dimension is dense, every entry is stored + // Compressed - dimension is sparse, only nonzeros are stored + // Singleton - dimension contains single coordinate, no siblings + enum class DimLevelType { + Dense, Compressed, Singleton + }; }]; } -#endif // LLVMIR_ATTRDEFS +#endif // SPARSETENSOR_ATTRDEFS diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -9,6 +9,7 @@ #ifndef SPARSETENSOR_OPS #define SPARSETENSOR_OPS +include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td" include "mlir/Dialect/SparseTensor/IR/SparseTensorBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" diff --git a/mlir/include/mlir/Dialect/Tensor/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/IR/CMakeLists.txt --- a/mlir/include/mlir/Dialect/Tensor/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Tensor/IR/CMakeLists.txt @@ -1,7 +1,2 @@ add_mlir_dialect(TensorOps tensor) add_mlir_doc(TensorOps TensorOps Dialects/ -gen-dialect-doc) - -set(LLVM_TARGET_DEFINITIONS TensorAttrDefs.td) -mlir_tablegen(TensorAttrDefs.h.inc -gen-attrdef-decls) -mlir_tablegen(TensorAttrDefs.cpp.inc -gen-attrdef-defs) -add_public_tablegen_target(MLIRTensorAttrDefsIncGen) diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h --- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h +++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h @@ -13,7 +13,6 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" -#include "mlir/IR/TensorEncoding.h" #include "mlir/Interfaces/CastInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" @@ -24,13 +23,6 @@ #include "mlir/Dialect/Tensor/IR/TensorOpsDialect.h.inc" -//===----------------------------------------------------------------------===// -// Tensor Dialect Attributes -//===----------------------------------------------------------------------===// - -#define GET_ATTRDEF_CLASSES -#include "mlir/Dialect/Tensor/IR/TensorAttrDefs.h.inc" - //===----------------------------------------------------------------------===// // Tensor Dialect Operations //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -10,7 +10,6 @@ #define TENSOR_OPS include "mlir/Dialect/Tensor/IR/TensorBase.td" -include "mlir/Dialect/Tensor/IR/TensorAttrDefs.td" include "mlir/Interfaces/CastInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" diff --git a/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt --- a/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt @@ -5,9 +5,11 @@ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SparseTensor DEPENDS + MLIRSparseTensorAttrDefsIncGen MLIRSparseTensorOpsIncGen LINK_LIBS PUBLIC MLIRDialect MLIRIR + MLIRSupport ) diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -9,12 +9,184 @@ #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpImplementation.h" +#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace mlir::sparse_tensor; +//===----------------------------------------------------------------------===// +// TensorDialect Attribute Methods +//===----------------------------------------------------------------------===// + +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc" + +static bool acceptBitWidth(unsigned bitWidth) { + switch (bitWidth) { + case 0: + case 8: + case 16: + case 32: + case 64: + return true; + default: + return false; + } +} + +Attribute SparseTensorEncodingAttr::parse(MLIRContext *context, + DialectAsmParser &parser, Type type) { + if (failed(parser.parseLess())) + return {}; + // Parse the data as a dictionary. + DictionaryAttr dict; + if (failed(parser.parseAttribute(dict))) + return {}; + if (failed(parser.parseGreater())) + return {}; + // Process the data from the parsed dictionary value into struct-like data. + SmallVector dlt; + AffineMap map = {}; + unsigned ptr = 0; + unsigned ind = 0; + for (const NamedAttribute &attr : dict) { + if (attr.first == "dimLevelType") { + auto arrayAttr = attr.second.dyn_cast(); + if (!arrayAttr) { + parser.emitError(parser.getNameLoc(), + "expected an array for dimension level types"); + return {}; + } + for (unsigned i = 0, e = arrayAttr.size(); i < e; i++) { + auto strAttr = arrayAttr[i].dyn_cast(); + if (!strAttr) { + parser.emitError(parser.getNameLoc(), + "expected a string value in dimension level types"); + return {}; + } + auto strVal = strAttr.getValue(); + if (strVal == "dense") { + dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Dense); + } else if (strVal == "compressed") { + dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Compressed); + } else if (strVal == "singleton") { + dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Singleton); + } else { + parser.emitError(parser.getNameLoc(), + "unexpected dimension level type: ") + << strVal; + return {}; + } + } + } else if (attr.first == "dimOrdering") { + auto affineAttr = attr.second.dyn_cast(); + if (!affineAttr) { + parser.emitError(parser.getNameLoc(), + "expected an affine map for dimension ordering"); + return {}; + } + map = affineAttr.getValue(); + } else if (attr.first == "pointerBitWidth") { + auto intAttr = attr.second.dyn_cast(); + if (!intAttr) { + parser.emitError(parser.getNameLoc(), + "expected an integral pointer bitwidth"); + return {}; + } + ptr = intAttr.getInt(); + } else if (attr.first == "indexBitWidth") { + auto intAttr = attr.second.dyn_cast(); + if (!intAttr) { + parser.emitError(parser.getNameLoc(), + "expected an integral index bitwidth"); + return {}; + } + ind = intAttr.getInt(); + } else { + parser.emitError(parser.getNameLoc(), "unexpected key: ") + << attr.first.str(); + return {}; + } + } + // Construct struct-like storage for attribute. + return parser.getChecked(context, dlt, map, ptr, + ind); +} + +void SparseTensorEncodingAttr::print(DialectAsmPrinter &printer) const { + // Print the struct-like storage in dictionary fashion. + printer << "encoding<{ dimLevelType = [ "; + for (unsigned i = 0, e = getDimLevelType().size(); i < e; i++) { + switch (getDimLevelType()[i]) { + case DimLevelType::Dense: + printer << "\"dense\""; + break; + case DimLevelType::Compressed: + printer << "\"compressed\""; + break; + case DimLevelType::Singleton: + printer << "\"singleton\""; + break; + } + if (i != e - 1) + printer << ", "; + } + printer << " ]"; + if (getDimOrdering()) + printer << ", dimOrdering = affine_map<" << getDimOrdering() << ">"; + printer << ", pointerBitWidth = " << getPointerBitWidth() + << ", indexBitWidth = " << getIndexBitWidth() << " }>"; +} + +LogicalResult SparseTensorEncodingAttr::verify( + function_ref emitError, + ArrayRef dimLevelType, AffineMap dimOrdering, + unsigned pointerBitWidth, unsigned indexBitWidth) { + if (!acceptBitWidth(pointerBitWidth)) + return emitError() << "unexpected pointer bitwidth: " << pointerBitWidth; + if (!acceptBitWidth(indexBitWidth)) + return emitError() << "unexpected index bitwidth: " << indexBitWidth; + if (dimOrdering) { + if (!dimOrdering.isPermutation()) + return emitError() + << "expected a permutation affine map for dimension ordering"; + if (dimOrdering.getNumResults() != dimLevelType.size()) + return emitError() << "unexpected mismatch in ordering and dimension " + "level types size"; + } + return success(); +} + +LogicalResult SparseTensorEncodingAttr::verifyEncoding( + ArrayRef shape, Type elementType, + function_ref emitError) const { + // Check structural integrity. + if (failed(verify(emitError, getDimLevelType(), getDimOrdering(), + getPointerBitWidth(), getIndexBitWidth()))) + return failure(); + // Check integrity with tensor type specifics. Dimension ordering is optional, + // but we always should have dimension level types for the full rank. + unsigned size = shape.size(); + if (getDimOrdering() && getDimOrdering().getNumResults() != size) + return emitError() << "expected an affine map of size " << size + << " for dimension ordering"; + if (getDimLevelType().size() != size) + return emitError() << "expected an array of size " << size + << " for dimension level types"; + return success(); +} + +//===----------------------------------------------------------------------===// +// TensorDialect Methods +//===----------------------------------------------------------------------===// + void SparseTensorDialect::initialize() { + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc" + >(); addOperations< #define GET_OP_LIST #include "mlir/Dialect/SparseTensor/IR/SparseTensorOps.cpp.inc" @@ -23,3 +195,23 @@ #define GET_OP_CLASSES #include "mlir/Dialect/SparseTensor/IR/SparseTensorOps.cpp.inc" + +Attribute SparseTensorDialect::parseAttribute(DialectAsmParser &parser, + Type type) const { + StringRef attrTag; + if (failed(parser.parseKeyword(&attrTag))) + return Attribute(); + Attribute attr; + auto parseResult = + generatedAttributeParser(getContext(), parser, attrTag, type, attr); + if (parseResult.hasValue()) + return attr; + parser.emitError(parser.getNameLoc(), "unknown sparse tensor attribute"); + return Attribute(); +} + +void SparseTensorDialect::printAttribute(Attribute attr, + DialectAsmPrinter &printer) const { + if (succeeded(generatedAttributePrinter(attr, printer))) + return; +} diff --git a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt --- a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt @@ -7,7 +7,6 @@ DEPENDS MLIRTensorOpsIncGen - MLIRTensorAttrDefsIncGen LINK_COMPONENTS Core diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp --- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp @@ -7,142 +7,11 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/IR/DialectImplementation.h" #include "mlir/Transforms/InliningUtils.h" -#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace mlir::tensor; -//===----------------------------------------------------------------------===// -// TableGen'd Attributes Methods -//===----------------------------------------------------------------------===// - -#define GET_ATTRDEF_CLASSES -#include "mlir/Dialect/Tensor/IR/TensorAttrDefs.cpp.inc" - -// Dictionary keys. -static constexpr StringRef getSparseDimLevelTypeAttrName() { - return "sparseDimLevelType"; -} -static constexpr StringRef getSparseDimOrderingAttrName() { - return "sparseDimOrdering"; -} -static constexpr StringRef getSparsePointerBitWidthAttrName() { - return "sparsePointerBitWidth"; -} -static constexpr StringRef getSparseIndexBitWidthAttrName() { - return "sparseIndexBitWidth"; -} - -// Dictionary values. -static constexpr StringRef getDenseDimLevelTypeVal() { return "dense"; } -static constexpr StringRef getCompressedDimLevelTypeVal() { - return "compressed"; -} -static constexpr StringRef getSingletonDimLevelTypeVal() { return "singleton"; } - -Attribute SparseTensorEncodingAttr::parse(MLIRContext *context, - DialectAsmParser &parser, Type type) { - if (failed(parser.parseLess())) - return {}; - DictionaryAttr dict; - if (failed(parser.parseAttribute(dict))) - return {}; - if (failed(parser.parseGreater())) - return {}; - return SparseTensorEncodingAttr::get(context, dict); -} - -void SparseTensorEncodingAttr::print(DialectAsmPrinter &printer) const { - printer << "sparse<" << getDict() << ">"; -} - -LogicalResult SparseTensorEncodingAttr::verifyEncoding( - llvm::ArrayRef shape, Type elementType, - llvm::function_ref emitError) const { - unsigned size = shape.size(); - for (const NamedAttribute &attr : getDict()) { - if (attr.first == getSparseDimLevelTypeAttrName()) { - // Dimension level type verification. - auto arrayAttr = attr.second.dyn_cast(); - if (!arrayAttr || size != static_cast(arrayAttr.size())) - return emitError() << "expected an array of size " << size - << " for dimension level types"; - for (unsigned i = 0; i < size; i++) { - auto strAttr = arrayAttr[i].dyn_cast(); - if (!strAttr) - return emitError() - << "expected string value in dimension level types"; - auto strVal = strAttr.getValue(); - if (strVal != getDenseDimLevelTypeVal() && - strVal != getCompressedDimLevelTypeVal() && - strVal != getSingletonDimLevelTypeVal()) - return emitError() << "unexpected dimension level type: " << strAttr; - } - } else if (attr.first == getSparseDimOrderingAttrName()) { - // Dimension order verification. - auto affineAttr = attr.second.dyn_cast(); - if (!affineAttr) - return emitError() << "expected an affine map for dimension ordering"; - AffineMap map = affineAttr.getValue(); - if (size != map.getNumResults() || !map.isPermutation()) - return emitError() << "expected a permutation affine map of size " - << size << " for dimension ordering"; - } else if (attr.first == getSparsePointerBitWidthAttrName() || - attr.first == getSparseIndexBitWidthAttrName()) { - // Pointer or index bitwidth verification. - auto intAttr = attr.second.dyn_cast(); - if (!intAttr) - return emitError() << "expected an integral bitwidth"; - switch (intAttr.getInt()) { - case 0: - case 8: - case 16: - case 32: - case 64: - continue; - default: - return emitError() << "unexpected bitwidth: " << intAttr.getInt(); - } - } else { - return emitError() << "unexpected key: " << attr.first.str(); - } - } - return success(); -} - -SparseTensorEncodingAttr::DimLevelType -SparseTensorEncodingAttr::getDimLevelType(unsigned dim) const { - if (auto value = getDict().get(getSparseDimLevelTypeAttrName())) { - auto strVal = - value.dyn_cast()[dim].cast().getValue(); - if (strVal == getCompressedDimLevelTypeVal()) - return DimLevelType::Compressed; - if (strVal == getSingletonDimLevelTypeVal()) - return DimLevelType::Singleton; - } - return DimLevelType::Dense; -} - -AffineMap SparseTensorEncodingAttr::getDimOrdering() const { - if (auto value = getDict().get(getSparseDimOrderingAttrName())) - return value.cast().getValue(); - return {}; -} - -unsigned SparseTensorEncodingAttr::getPointerBitWidth() const { - if (auto value = getDict().get(getSparsePointerBitWidthAttrName())) - return value.cast().getInt(); - return 0; -} - -unsigned SparseTensorEncodingAttr::getIndexBitWidth() const { - if (auto value = getDict().get(getSparseIndexBitWidthAttrName())) - return value.cast().getInt(); - return 0; -} - //===----------------------------------------------------------------------===// // TensorDialect Dialect Interfaces //===----------------------------------------------------------------------===// @@ -166,33 +35,9 @@ //===----------------------------------------------------------------------===// void TensorDialect::initialize() { - addAttributes< -#define GET_ATTRDEF_LIST -#include "mlir/Dialect/Tensor/IR/TensorAttrDefs.cpp.inc" - >(); addOperations< #define GET_OP_LIST #include "mlir/Dialect/Tensor/IR/TensorOps.cpp.inc" >(); addInterfaces(); } - -Attribute TensorDialect::parseAttribute(DialectAsmParser &parser, - Type type) const { - StringRef attrTag; - if (failed(parser.parseKeyword(&attrTag))) - return Attribute(); - Attribute attr; - auto parseResult = - generatedAttributeParser(getContext(), parser, attrTag, type, attr); - if (parseResult.hasValue()) - return attr; - parser.emitError(parser.getNameLoc(), "unknown tensor attribute"); - return Attribute(); -} - -void TensorDialect::printAttribute(::mlir::Attribute attr, - ::mlir::DialectAsmPrinter &printer) const { - if (succeeded(generatedAttributePrinter(attr, printer))) - return; -} diff --git a/mlir/lib/IR/Dominance.cpp b/mlir/lib/IR/Dominance.cpp --- a/mlir/lib/IR/Dominance.cpp +++ b/mlir/lib/IR/Dominance.cpp @@ -27,9 +27,10 @@ /// Return true if the region with the given index inside the operation /// has SSA dominance. static bool hasSSADominance(Operation *op, unsigned index) { + if (!op->isRegistered()) return false; + auto kindInterface = dyn_cast(op); - return op->isRegistered() && - (!kindInterface || kindInterface.hasSSADominance(index)); + return !kindInterface || kindInterface.hasSSADominance(index); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/Verifier.cpp b/mlir/lib/IR/Verifier.cpp --- a/mlir/lib/IR/Verifier.cpp +++ b/mlir/lib/IR/Verifier.cpp @@ -137,10 +137,11 @@ return emitError(block, "empty block: expect at least a terminator"); } - // Verify the non-terminator operations separately so that we can verify - // they have no successors. - for (auto &op : llvm::make_range(block.begin(), std::prev(block.end()))) { - if (op.getNumSuccessors() != 0) + // Check each operation, and make sure there are no branches out of the + // middle of this block. + for (auto &op : llvm::make_range(block.begin(), block.end())) { + // Only the last instructions is allowed to have successors. + if (op.getNumSuccessors() != 0 && &op != &block.back()) return op.emitError( "operation with block successors must terminate its parent block"); @@ -148,11 +149,6 @@ return failure(); } - // Verify the terminator. - Operation &terminator = block.back(); - if (failed(verifyOperation(terminator))) - return failure(); - // Verify that this block is not branching to a block of a different // region. for (Block *successor : block.getSuccessors()) @@ -164,6 +160,7 @@ if (mayBeValidWithoutTerminator(&block)) return success(); + Operation &terminator = block.back(); if (!terminator.mightHaveTrait()) return block.back().emitError("block with no terminator, has ") << terminator; @@ -245,6 +242,10 @@ return success(); } +//===----------------------------------------------------------------------===// +// Dominance Checking +//===----------------------------------------------------------------------===// + /// Emit an error when the specified operand of the specified operation is an /// invalid use because of dominance properties. static void diagnoseInvalidOperandDominance(Operation &op, unsigned operandNo) { diff --git a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir @@ -0,0 +1,56 @@ +// RUN: mlir-opt <%s -split-input-file -verify-diagnostics + +// ----- + +#a = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}> +func private @tensor_size_mismatch(%arg0: tensor<8xi32, #a>) -> () // expected-error {{expected an array of size 1 for dimension level types}} + +// ----- + +#a = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"], dimOrdering = affine_map<(i) -> (i)>}> // expected-error {{unexpected mismatch in ordering and dimension level types size}} +func private @tensor_sizes_mismatch(%arg0: tensor<8xi32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{dimLevelType = [1]}> // expected-error {{expected a string value in dimension level types}} +func private @tensor_type_mismatch(%arg0: tensor<8xi32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{dimLevelType = ["strange"]}> // expected-error {{unexpected dimension level type: strange}} +func private @tensor_value_mismatch(%arg0: tensor<8xi32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{dimOrdering = "wrong"}> // expected-error {{expected an affine map for dimension ordering}} +func private @tensor_order_mismatch(%arg0: tensor<8xi32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{dimOrdering = affine_map<(i,j) -> (i,i)>}> // expected-error {{expected a permutation affine map for dimension ordering}} +func private @tensor_no_permutation(%arg0: tensor<16x32xf32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{pointerBitWidth = "x"}> // expected-error {{expected an integral pointer bitwidth}} +func private @tensor_no_int_ptr(%arg0: tensor<16x32xf32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{pointerBitWidth = 42}> // expected-error {{unexpected pointer bitwidth: 42}} +func private @tensor_invalid_int_ptr(%arg0: tensor<16x32xf32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{indexBitWidth = "not really"}> // expected-error {{expected an integral index bitwidth}} +func private @tensor_no_int_index(%arg0: tensor<16x32xf32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{indexBitWidth = 128}> // expected-error {{unexpected index bitwidth: 128}} +func private @tensor_invalid_int_index(%arg0: tensor<16x32xf32, #a>) -> () + +// ----- + +#a = #sparse_tensor.encoding<{key = 1}> // expected-error {{unexpected key: key}} +func private @tensor_invalid_key(%arg0: tensor<16x32xf32, #a>) -> () diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir @@ -0,0 +1,16 @@ +// RUN: mlir-opt <%s | mlir-opt | FileCheck %s + +// CHECK-LABEL: func private @sparse_1d_tensor( +// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>) +func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{dimLevelType = ["compressed"]}>>) + +#CSR = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ], + dimOrdering = affine_map<(i,j) -> (i,j)>, + pointerBitWidth = 64, + indexBitWidth = 64 +}> + +// CHECK-LABEL: func private @sparse_2d_tensor( +// CHECK-SAME: tensor (d0, d1)>, pointerBitWidth = 64, indexBitWidth = 64 }>>) +func private @sparse_2d_tensor(tensor) diff --git a/mlir/test/Dialect/Tensor/invalid_sparse_tensor.mlir b/mlir/test/Dialect/Tensor/invalid_sparse_tensor.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Tensor/invalid_sparse_tensor.mlir +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: mlir-opt <%s -split-input-file -verify-diagnostics - -// ----- - -#a = #tensor.sparse<{sparseDimLevelType = [1,2]}> -func private @tensor_size_mismatch(%arg0: tensor<8xi32, #a>) -> () // expected-error {{expected an array of size 1 for dimension level types}} - -// ----- - -#a = #tensor.sparse<{sparseDimLevelType = [1]}> -func private @tensor_type_mismatch(%arg0: tensor<8xi32, #a>) -> () // expected-error {{expected string value in dimension level types}} - -// ----- - -#a = #tensor.sparse<{sparseDimLevelType = ["strange"]}> -func private @tensor_value_mismatch(%arg0: tensor<8xi32, #a>) -> () // expected-error {{unexpected dimension level type: "strange"}} - -// ----- - -#a = #tensor.sparse<{sparseDimOrdering = "wrong"}> -func private @tensor_order_mismatch(%arg0: tensor<8xi32, #a>) -> () // expected-error {{expected an affine map for dimension ordering}} - -// ----- - -#a = #tensor.sparse<{sparseDimOrdering = affine_map<(i,j) -> (i,i)>}> -func private @tensor_no_permutation(%arg0: tensor<16x32xf32, #a>) -> () // expected-error {{expected a permutation affine map of size 2 for dimension ordering}} - -// ----- - -#a = #tensor.sparse<{sparsePointerBitWidth = 42}> -func private @tensor_invalid_int_ptr(%arg0: tensor<16x32xf32, #a>) -> () // expected-error {{unexpected bitwidth: 42}} - -// ----- - -#a = #tensor.sparse<{sparseIndexBitWidth = "not really"}> -func private @tensor_no_int_index(%arg0: tensor<16x32xf32, #a>) -> () // expected-error {{expected an integral bitwidth}} - -// ----- - -#a = #tensor.sparse<{sparseIndexBitWidth = 128}> -func private @tensor_invalid_int_index(%arg0: tensor<16x32xf32, #a>) -> () // expected-error {{unexpected bitwidth: 128}} - -// ----- - -#a = #tensor.sparse<{key = 1}> -func private @tensor_invalid_key(%arg0: tensor<16x32xf32, #a>) -> () // expected-error {{unexpected key: key}} diff --git a/mlir/test/Dialect/Tensor/valid_sparse.mlir b/mlir/test/Dialect/Tensor/valid_sparse.mlir deleted file mode 100644 --- a/mlir/test/Dialect/Tensor/valid_sparse.mlir +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: mlir-opt <%s | mlir-opt | FileCheck %s - -// CHECK: func private @sparse_1d_tensor(tensor<32xf64, #tensor.sparse<{sparseDimLevelType = ["compressed"]}>>) -func private @sparse_1d_tensor(tensor<32xf64, #tensor.sparse<{sparseDimLevelType = ["compressed"]}>>) - -#CSR = #tensor.sparse<{ - sparseDimLevelType = [ "dense", "compressed" ], - sparseDimOrdering = affine_map<(i,j) -> (i,j)>, - sparseIndexBitWidth = 64, - sparsePointerBitWidth = 64 -}> - -// CHECK: func private @sparse_2d_tensor(tensor (d0, d1)>, sparseIndexBitWidth = 64 : i64, sparsePointerBitWidth = 64 : i64}>>) -func private @sparse_2d_tensor(tensor)