diff --git a/clang/include/clang/AST/ParentMapContext.h b/clang/include/clang/AST/ParentMapContext.h --- a/clang/include/clang/AST/ParentMapContext.h +++ b/clang/include/clang/AST/ParentMapContext.h @@ -69,7 +69,7 @@ ASTContext &ASTCtx; class ParentMap; ast_type_traits::TraversalKind Traversal = ast_type_traits::TK_AsIs; - std::map> Parents; + std::unique_ptr Parents; }; class TraversalKindScope { diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp --- a/clang/lib/AST/ParentMapContext.cpp +++ b/clang/lib/AST/ParentMapContext.cpp @@ -23,7 +23,7 @@ ParentMapContext::~ParentMapContext() = default; -void ParentMapContext::clear() { Parents.clear(); } +void ParentMapContext::clear() { Parents.reset(); } const Expr *ParentMapContext::traverseIgnored(const Expr *E) const { return traverseIgnored(const_cast(E)); @@ -116,11 +116,79 @@ } } - DynTypedNodeList getParents(const ast_type_traits::DynTypedNode &Node) { - if (Node.getNodeKind().hasPointerIdentity()) - return getDynNodeFromMap(Node.getMemoizationData(), PointerParents); + DynTypedNodeList getParents(ast_type_traits::TraversalKind TK, + const ast_type_traits::DynTypedNode &Node) { + if (Node.getNodeKind().hasPointerIdentity()) { + auto ParentList = + getDynNodeFromMap(Node.getMemoizationData(), PointerParents); + if (ParentList.size() == 1 && + TK == ast_type_traits::TK_IgnoreUnlessSpelledInSource) { + const auto *E = ParentList[0].get(); + const auto *Child = Node.get(); + if (E && Child) + return AscendIgnoreUnlessSpelledInSource(E, Child); + } + return ParentList; + } return getDynNodeFromMap(Node, OtherParents); } + + ast_type_traits::DynTypedNode + AscendIgnoreUnlessSpelledInSource(const Expr *E, const Expr *Child) { + + auto ShouldSkip = [](const Expr *E, const Expr *Child) { + if (isa(E)) + return true; + + if (isa(E)) + return true; + + if (isa(E)) + return true; + + if (isa(E)) + return true; + + if (isa(E)) + return true; + + if (isa(E)) + return true; + + auto SR = Child->getSourceRange(); + + if (const auto *C = dyn_cast(E)) { + if (C->getSourceRange() == SR || !isa(C)) + return true; + } + + if (const auto *C = dyn_cast(E)) { + if (C->getSourceRange() == SR) + return true; + } + + if (const auto *C = dyn_cast(E)) { + if (C->getSourceRange() == SR) + return true; + } + return false; + }; + + while (ShouldSkip(E, Child)) { + auto It = PointerParents.find(E); + if (It == PointerParents.end()) + break; + const auto *S = It->second.dyn_cast(); + if (!S) + return getSingleDynTypedNodeFromParentMap(It->second); + const auto *P = dyn_cast(S); + if (!P) + return ast_type_traits::DynTypedNode::create(*S); + Child = E; + E = P; + } + return ast_type_traits::DynTypedNode::create(*E); + } }; /// Template specializations to abstract away from pointers and TypeLocs. @@ -151,8 +219,7 @@ class ParentMapContext::ParentMap::ASTVisitor : public RecursiveASTVisitor { public: - ASTVisitor(ParentMap &Map, ParentMapContext &MapCtx) - : Map(Map), MapCtx(MapCtx) {} + ASTVisitor(ParentMap &Map) : Map(Map) {} private: friend class RecursiveASTVisitor; @@ -222,11 +289,8 @@ } bool TraverseStmt(Stmt *StmtNode) { - Stmt *FilteredNode = StmtNode; - if (auto *ExprNode = dyn_cast_or_null(FilteredNode)) - FilteredNode = MapCtx.traverseIgnored(ExprNode); - return TraverseNode(FilteredNode, FilteredNode, - [&] { return VisitorBase::TraverseStmt(FilteredNode); }, + return TraverseNode(StmtNode, StmtNode, + [&] { return VisitorBase::TraverseStmt(StmtNode); }, &Map.PointerParents); } @@ -245,21 +309,18 @@ } ParentMap ⤅ - ParentMapContext &MapCtx; llvm::SmallVector ParentStack; }; ParentMapContext::ParentMap::ParentMap(ASTContext &Ctx) { - ASTVisitor(*this, Ctx.getParentMapContext()).TraverseAST(Ctx); + ASTVisitor(*this).TraverseAST(Ctx); } DynTypedNodeList ParentMapContext::getParents(const ast_type_traits::DynTypedNode &Node) { - std::unique_ptr &P = Parents[Traversal]; - if (!P) + if (!Parents) // We build the parent map for the traversal scope (usually whole TU), as // hasAncestor can escape any subtree. - P = std::make_unique(ASTCtx); - return P->getParents(Node); + Parents = std::make_unique(ASTCtx); + return Parents->getParents(getTraversalKind(), Node); } - diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -3383,25 +3383,6 @@ Diag(Tok, diag::err_requires_expr_missing_arrow) << FixItHint::CreateInsertion(Tok.getLocation(), "->"); // Try to parse a 'type-constraint' - CXXScopeSpec SS; - if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), - /*EnteringContext=*/false, - /*MayBePseudoDestructor=*/nullptr, - // If this is not a type-constraint, - // then this scope-spec is part of - // the typename of a non-type - // template parameter - /*IsTypename=*/true, - /*LastII=*/nullptr, - // We won't find concepts in - // non-namespaces anyway, so might as - // well parse this correctly for - // possible type names. - /*OnlyNamespace=*/false, - /*SuppressDiagnostic=*/true)) { - SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch); - break; - } if (TryAnnotateTypeConstraint()) { SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch); break; @@ -3411,8 +3392,13 @@ SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch); break; } - if (Tok.is(tok::annot_cxxscope)) + CXXScopeSpec SS; + if (Tok.is(tok::annot_cxxscope)) { + Actions.RestoreNestedNameSpecifierAnnotation(Tok.getAnnotationValue(), + Tok.getAnnotationRange(), + SS); ConsumeAnnotationToken(); + } Req = Actions.ActOnCompoundRequirement( Expression.get(), NoexceptLoc, SS, takeTemplateIdAnnotation(Tok), diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2047,12 +2047,14 @@ if (const auto *TC = TTP->getTypeConstraint()) { TemplateArgumentListInfo TransformedArgs; const auto *ArgsAsWritten = TC->getTemplateArgsAsWritten(); - if (SemaRef.Subst(ArgsAsWritten->getTemplateArgs(), + if (!ArgsAsWritten || + SemaRef.Subst(ArgsAsWritten->getTemplateArgs(), ArgsAsWritten->NumTemplateArgs, TransformedArgs, Args)) SemaRef.AttachTypeConstraint( TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(), - TC->getNamedConcept(), &TransformedArgs, NewTTP, + TC->getNamedConcept(), ArgsAsWritten ? &TransformedArgs : nullptr, + NewTTP, NewTTP->isParameterPack() ? cast(TC->getImmediatelyDeclaredConstraint()) ->getEllipsisLoc() diff --git a/clang/test/Parser/cxx2a-abbreviated-templates.cpp b/clang/test/Parser/cxx2a-abbreviated-templates.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Parser/cxx2a-abbreviated-templates.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -std=c++2a -x c++ %s -verify +// expected-no-diagnostics + +template +concept C = true; + +namespace ns { + template + concept D = true; +} + +void foo(C auto a, + C auto b, + ns::D auto c, + ns::D auto d, + const C auto e, + const C auto f, + const ns::D auto g, + const ns::D auto h); \ No newline at end of file diff --git a/clang/test/Parser/cxx2a-concepts-requires-expr.cpp b/clang/test/Parser/cxx2a-concepts-requires-expr.cpp --- a/clang/test/Parser/cxx2a-concepts-requires-expr.cpp +++ b/clang/test/Parser/cxx2a-concepts-requires-expr.cpp @@ -108,34 +108,38 @@ bool r30 = requires { { 0 } noexcept -> C2; }; +namespace ns { template concept C = true; } + +bool r31 = requires { { 0 } noexcept -> ns::C; }; + template T i1 = 0; -bool r31 = requires { requires false, 1; }; +bool r32 = requires { requires false, 1; }; // expected-error@-1 {{expected ';' at end of requirement}} -bool r32 = requires { 0 noexcept; }; +bool r33 = requires { 0 noexcept; }; // expected-error@-1 {{'noexcept' can only be used in a compound requirement (with '{' '}' around the expression)}} -bool r33 = requires { 0 int; }; +bool r34 = requires { 0 int; }; // expected-error@-1 {{expected ';' at end of requirement}} -bool r34 = requires { requires true }; +bool r35 = requires { requires true }; // expected-error@-1 {{expected ';' at end of requirement}} -bool r35 = requires (bool b) { requires sizeof(b) == 1; }; +bool r36 = requires (bool b) { requires sizeof(b) == 1; }; -void r36(bool b) requires requires { 1 } {} +void r37(bool b) requires requires { 1 } {} // expected-error@-1 {{expected ';' at end of requirement}} -bool r37 = requires { requires { 1; }; }; +bool r38 = requires { requires { 1; }; }; // expected-warning@-1 {{this requires expression will only be checked for syntactic validity; did you intend to place it in a nested requirement? (add another 'requires' before the expression)}} -bool r38 = requires { requires () { 1; }; }; +bool r39 = requires { requires () { 1; }; }; // expected-warning@-1 {{this requires expression will only be checked for syntactic validity; did you intend to place it in a nested requirement? (add another 'requires' before the expression)}} -bool r39 = requires { requires (int i) { i; }; }; +bool r40 = requires { requires (int i) { i; }; }; // expected-warning@-1 {{this requires expression will only be checked for syntactic validity; did you intend to place it in a nested requirement? (add another 'requires' before the expression)}} -bool r40 = requires { requires (); }; +bool r41 = requires { requires (); }; // expected-error@-1 {{expected expression}} diff --git a/clang/test/Parser/cxx2a-placeholder-type-constraint.cpp b/clang/test/Parser/cxx2a-placeholder-type-constraint.cpp --- a/clang/test/Parser/cxx2a-placeholder-type-constraint.cpp +++ b/clang/test/Parser/cxx2a-placeholder-type-constraint.cpp @@ -3,24 +3,33 @@ template concept C = true; +namespace ns { + template + concept D = true; +} + int foo() { - C auto a4 = 1; - C<> auto a5 = 1; - C auto a6 = 1; - const C auto &a7 = 1; - const C<> auto &a8 = 1; - const C auto &a9 = 1; - C decltype(auto) a10 = 1; - C<> decltype(auto) a11 = 1; - C decltype(auto) a12 = 1; - const C<> decltype(auto) &a13 = 1; // expected-error{{'decltype(auto)' cannot be combined with other type specifiers}} + {ns::D auto a = 1;} + {C auto a = 1;} + {C<> auto a = 1;} + {C auto a = 1;} + {ns::D auto a = 1;} + {const ns::D auto &a = 1;} + {const C auto &a = 1;} + {const C<> auto &a = 1;} + {const C auto &a = 1;} + {const ns::D auto &a = 1;} + {C decltype(auto) a = 1;} + {C<> decltype(auto) a = 1;} + {C decltype(auto) a = 1;} + {const C<> decltype(auto) &a = 1;} // expected-error{{'decltype(auto)' cannot be combined with other type specifiers}} // expected-error@-1{{non-const lvalue reference to type 'int' cannot bind to a temporary of type 'int'}} - const C decltype(auto) &a14 = 1; // expected-error{{'decltype(auto)' cannot be combined with other type specifiers}} + {const C decltype(auto) &a = 1;} // expected-error{{'decltype(auto)' cannot be combined with other type specifiers}} // expected-error@-1{{non-const lvalue reference to type 'int' cannot bind to a temporary of type 'int'}} - C a15 = 1; + {C a = 1;} // expected-error@-1{{expected 'auto' or 'decltype(auto)' after concept name}} - C decltype a19 = 1; + {C decltype a19 = 1;} // expected-error@-1{{expected '('}} - C decltype(1) a20 = 1; + {C decltype(1) a20 = 1;} // expected-error@-1{{expected 'auto' or 'decltype(auto)' after concept name}} } \ No newline at end of file diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -16,7 +16,7 @@ ) if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD) - project(libunwind) + project(libunwind LANGUAGES C CXX ASM) # Rely on llvm-config. set(CONFIG_OUTPUT) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15033,8 +15033,7 @@ Arguments: """""""""" -The first operand is a vector of pointers which holds all memory addresses to read. The second operand is an alignment of the source addresses. It must be a constant integer value. The third operand, mask, is a vector of boolean values with the same number of elements as the return type. The fourth is a pass-through value that is used to fill the masked-off lanes of the result. The return type, underlying type of the vector of pointers and the type of the '``passthru``' operand are the same vector types. - +The first operand is a vector of pointers which holds all memory addresses to read. The second operand is an alignment of the source addresses. It must be 0 or a power of two constant integer value. The third operand, mask, is a vector of boolean values with the same number of elements as the return type. The fourth is a pass-through value that is used to fill the masked-off lanes of the result. The return type, underlying type of the vector of pointers and the type of the '``passthru``' operand are the same vector types. Semantics: """""""""" @@ -15086,8 +15085,7 @@ Arguments: """""""""" -The first operand is a vector value to be written to memory. The second operand is a vector of pointers, pointing to where the value elements should be stored. It has the same underlying type as the value operand. The third operand is an alignment of the destination addresses. The fourth operand, mask, is a vector of boolean values. The types of the mask and the value operand must have the same number of vector elements. - +The first operand is a vector value to be written to memory. The second operand is a vector of pointers, pointing to where the value elements should be stored. It has the same underlying type as the value operand. The third operand is an alignment of the destination addresses. It must be 0 or a power of two constant integer value. The fourth operand, mask, is a vector of boolean values. The types of the mask and the value operand must have the same number of vector elements. Semantics: """""""""" diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4651,6 +4651,21 @@ break; } + case Intrinsic::masked_gather: { + const APInt &Alignment = + cast(Call.getArgOperand(1))->getValue(); + Assert(Alignment.isNullValue() || Alignment.isPowerOf2(), + "masked_gather: alignment must be 0 or a power of 2", Call); + break; + } + case Intrinsic::masked_scatter: { + const APInt &Alignment = + cast(Call.getArgOperand(2))->getValue(); + Assert(Alignment.isNullValue() || Alignment.isPowerOf2(), + "masked_scatter: alignment must be 0 or a power of 2", Call); + break; + } + case Intrinsic::experimental_guard: { Assert(isa(Call), "experimental_guard cannot be invoked", Call); Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34555,8 +34555,8 @@ // permilps(shufps(load(),x)) --> permilps(shufps(x,load())) static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, SelectionDAG &DAG) { - // TODO: Add general vXf32 + vXf64 support. - if (VT != MVT::v4f32) + // TODO: Add vXf64 support. + if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32) return SDValue(); // SHUFP(LHS, RHS) -> SHUFP(RHS, LHS) iff LHS is foldable + RHS is not. diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td --- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td @@ -545,8 +545,40 @@ def : SchedAlias; // This is for simple LEAs with one or two input operands. -// FIXME: SAGU 3-operand LEA -def : WriteRes { let NumMicroOps = 2; } +def : WriteRes { let ResourceCycles = [2]; } + +// This write is used for slow LEA instructions. +def PdWrite3OpsLEA : SchedWriteRes<[PdEX01]> { + let Latency = 2; + let ResourceCycles = [2]; +} + +// On Piledriver, a slow LEA is either a 3Ops LEA (base, index, offset), +// or an LEA with a `Scale` value different than 1. +def PdSlowLEAPredicate : MCSchedPredicate< + CheckAny<[ + // A 3-operand LEA (base, index, offset). + IsThreeOperandsLEAFn, + // An LEA with a "Scale" different than 1. + CheckAll<[ + CheckIsImmOperand<2>, + CheckNot> + ]> + ]> +>; + +def PdWriteLEA : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; + +def : InstRW<[PdWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; + +def PdWriteLEA16r : SchedWriteRes<[PdEX01]> { + let ResourceCycles = [3]; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteLEA16r], (instrs LEA16r)>; // Bit counts. defm : PdWriteResExPair; diff --git a/llvm/test/Assembler/auto_upgrade_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_intrinsics.ll --- a/llvm/test/Assembler/auto_upgrade_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_intrinsics.ll @@ -98,7 +98,7 @@ define void @tests.masked.scatter(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %val) { ; CHECK-LABEL: @tests.masked.scatter( ; CHECK: @llvm.masked.scatter.v2f64.v2p0f64 - call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptr, i32 3, <2 x i1> %mask) + call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptr, i32 1, <2 x i1> %mask) ret void } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -1308,16 +1308,14 @@ define <8 x float> @shuffle_mem_v8f32_8BA0CFE4(<8 x float> %a0, <8 x float>* %a1) { ; AVX1OR2-LABEL: shuffle_mem_v8f32_8BA0CFE4: ; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vmovaps (%rdi), %ymm1 -; AVX1OR2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[2,0],ymm1[4,4],ymm0[6,4] -; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm1[2,0],ymm0[4,7],ymm1[6,4] +; AVX1OR2-NEXT: vshufps {{.*#+}} ymm1 = ymm0[2,0],mem[0,0],ymm0[6,4],mem[4,4] +; AVX1OR2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm1[0,2],ymm0[4,7],ymm1[4,6] ; AVX1OR2-NEXT: retq ; ; AVX512VL-SLOW-LABEL: shuffle_mem_v8f32_8BA0CFE4: ; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vmovaps (%rdi), %ymm1 -; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[2,0],ymm1[4,4],ymm0[6,4] -; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm1[2,0],ymm0[4,7],ymm1[6,4] +; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm1 = ymm0[2,0],mem[0,0],ymm0[6,4],mem[4,4] +; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm1[0,2],ymm0[4,7],ymm1[4,6] ; AVX512VL-SLOW-NEXT: retq ; ; AVX512VL-FAST-LABEL: shuffle_mem_v8f32_8BA0CFE4: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -274,9 +274,8 @@ define <16 x float> @shuffle_v16f32_load_08_11_10_00_12_15_14_04(<16 x float> %a0, <16 x float>* %a1) { ; ALL-LABEL: shuffle_v16f32_load_08_11_10_00_12_15_14_04: ; ALL: # %bb.0: -; ALL-NEXT: vmovaps (%rdi), %zmm1 -; ALL-NEXT: vshufps {{.*#+}} zmm1 = zmm1[0,0],zmm0[2,0],zmm1[4,4],zmm0[6,4],zmm1[8,8],zmm0[10,8],zmm1[12,12],zmm0[14,12] -; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[2,0],zmm0[4,7],zmm1[6,4],zmm0[8,11],zmm1[10,8],zmm0[12,15],zmm1[14,12] +; ALL-NEXT: vshufps {{.*#+}} zmm1 = zmm0[2,0],mem[0,0],zmm0[6,4],mem[4,4],zmm0[10,8],mem[8,8],zmm0[14,12],mem[12,12] +; ALL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[0,2],zmm0[4,7],zmm1[4,6],zmm0[8,11],zmm1[8,10],zmm0[12,15],zmm1[12,14] ; ALL-NEXT: retq %1 = load <16 x float>, <16 x float>* %a1 %2 = shufflevector <16 x float> %1, <16 x float> %a0, <16 x i32> diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -258,7 +258,7 @@ ; CHECK-LABEL: @scatter_zeromask( ; CHECK-NEXT: ret void ; - call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer) + call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> zeroinitializer) ret void } diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll --- a/llvm/test/Transforms/InstCombine/shift-add.ll +++ b/llvm/test/Transforms/InstCombine/shift-add.ll @@ -5,7 +5,7 @@ define i32 @shl_C1_add_A_C2_i32(i16 %A) { ; CHECK-LABEL: @shl_C1_add_A_C2_i32( -; CHECK-NEXT: [[B:%.*]] = zext i16 %A to i32 +; CHECK-NEXT: [[B:%.*]] = zext i16 [[A:%.*]] to i32 ; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]] ; CHECK-NEXT: ret i32 [[D]] ; @@ -27,7 +27,7 @@ define i32 @lshr_C1_add_A_C2_i32(i32 %A) { ; CHECK-LABEL: @lshr_C1_add_A_C2_i32( -; CHECK-NEXT: [[B:%.*]] = and i32 %A, 65535 +; CHECK-NEXT: [[B:%.*]] = and i32 [[A:%.*]], 65535 ; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]] ; CHECK-NEXT: ret i32 [[D]] ; @@ -39,7 +39,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32( -; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> %A to <4 x i32> +; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> ; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; @@ -51,7 +51,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32( -; CHECK-NEXT: [[B:%.*]] = and <4 x i32> %A, +; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; @@ -63,7 +63,7 @@ define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32( -; CHECK-NEXT: [[B:%.*]] = and <4 x i32> %A, +; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], ; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; @@ -72,3 +72,54 @@ %D = lshr <4 x i32> , %C ret <4 x i32> %D } + +define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) { +; CHECK-LABEL: @shl_C1_add_A_C2_v4i32_splat( +; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 +; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 +; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[D:%.*]] = add <4 x i32> [[C]], +; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[D]] +; CHECK-NEXT: ret <4 x i32> [[E]] +; + %A = zext i16 %I to i32 + %B = insertelement <4 x i32> undef, i32 %A, i32 0 + %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer + %D = add <4 x i32> %C, + %E = shl <4 x i32> , %D + ret <4 x i32> %E +} + +define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) { +; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32_splat( +; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 +; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 +; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[D:%.*]] = add <4 x i32> [[C]], +; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[D]] +; CHECK-NEXT: ret <4 x i32> [[E]] +; + %A = zext i16 %I to i32 + %B = insertelement <4 x i32> undef, i32 %A, i32 0 + %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer + %D = add <4 x i32> %C, + %E = ashr <4 x i32> , %D + ret <4 x i32> %E +} + +define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) { +; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32_splat( +; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 +; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 +; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[D:%.*]] = add <4 x i32> [[C]], +; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[D]] +; CHECK-NEXT: ret <4 x i32> [[E]] +; + %A = zext i16 %I to i32 + %B = insertelement <4 x i32> undef, i32 %A, i32 0 + %C = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer + %D = add <4 x i32> %C, + %E = lshr <4 x i32> , %D + ret <4 x i32> %E +} diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-lea.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-lea.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-lea.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-lea.s @@ -148,141 +148,141 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 1 0.50 leaw 0, %cx -# CHECK-NEXT: 2 1 0.50 leal 0, %ecx -# CHECK-NEXT: 2 1 0.50 leaq 0, %rcx -# CHECK-NEXT: 2 1 0.50 leaw (%eax), %cx -# CHECK-NEXT: 2 1 0.50 leal (%eax), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (%eax), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (%rax), %cx -# CHECK-NEXT: 2 1 0.50 leal (%rax), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (%rax), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal (,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal (,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal (,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal (,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (,%ebx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal (,%ebx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (,%ebx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (,%rbx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal (,%rbx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (,%rbx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (%eax,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal (%eax,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (%rax,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal (%rax,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (%eax,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal (%eax,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (%eax,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (%rax,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal (%rax,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (%rax,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (%eax,%ebx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal (%eax,%ebx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (%eax,%ebx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw (%rax,%rbx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal (%rax,%rbx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq (%rax,%rbx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16, %cx -# CHECK-NEXT: 2 1 0.50 leal -16, %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16, %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(%eax), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(%eax), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(%eax), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(%rax), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(%rax), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(%rax), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(,%ebx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(,%ebx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(,%ebx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(,%rbx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(,%rbx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(,%rbx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(%eax,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(%eax,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(%rax,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(%rax,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(%eax,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(%eax,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(%rax,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(%rax,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(%eax,%ebx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(%eax,%ebx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(%eax,%ebx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw -16(%rax,%rbx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal -16(%rax,%rbx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq -16(%rax,%rbx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024, %cx -# CHECK-NEXT: 2 1 0.50 leal 1024, %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024, %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(%eax), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(%eax), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(%eax), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(%rax), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(%rax), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(%rax), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(,%ebx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(,%ebx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(,%ebx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(,%rbx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(,%rbx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(,%rbx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(%eax,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(%eax,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(%rax,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(%rax,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(%eax,%ebx), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(%eax,%ebx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(%rax,%rbx), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(%rax,%rbx), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(%eax,%ebx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(%eax,%ebx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(%eax,%ebx,2), %rcx -# CHECK-NEXT: 2 1 0.50 leaw 1024(%rax,%rbx,2), %cx -# CHECK-NEXT: 2 1 0.50 leal 1024(%rax,%rbx,2), %ecx -# CHECK-NEXT: 2 1 0.50 leaq 1024(%rax,%rbx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 0, %cx +# CHECK-NEXT: 1 1 1.00 leal 0, %ecx +# CHECK-NEXT: 1 1 1.00 leaq 0, %rcx +# CHECK-NEXT: 2 1 1.50 leaw (%eax), %cx +# CHECK-NEXT: 1 1 1.00 leal (%eax), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (%eax), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (%rax), %cx +# CHECK-NEXT: 1 1 1.00 leal (%rax), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (%rax), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (,%ebx), %cx +# CHECK-NEXT: 1 1 1.00 leal (,%ebx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (,%rbx), %cx +# CHECK-NEXT: 1 1 1.00 leal (,%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (,%ebx), %cx +# CHECK-NEXT: 1 1 1.00 leal (,%ebx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (,%rbx), %cx +# CHECK-NEXT: 1 1 1.00 leal (,%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (,%ebx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal (,%ebx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq (,%ebx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (,%rbx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal (,%rbx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq (,%rbx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (%eax,%ebx), %cx +# CHECK-NEXT: 1 1 1.00 leal (%eax,%ebx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (%eax,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (%rax,%rbx), %cx +# CHECK-NEXT: 1 1 1.00 leal (%rax,%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (%rax,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (%eax,%ebx), %cx +# CHECK-NEXT: 1 1 1.00 leal (%eax,%ebx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (%eax,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (%rax,%rbx), %cx +# CHECK-NEXT: 1 1 1.00 leal (%rax,%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq (%rax,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (%eax,%ebx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal (%eax,%ebx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq (%eax,%ebx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw (%rax,%rbx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal (%rax,%rbx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq (%rax,%rbx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16, %cx +# CHECK-NEXT: 1 1 1.00 leal -16, %ecx +# CHECK-NEXT: 1 1 1.00 leaq -16, %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(%eax), %cx +# CHECK-NEXT: 1 1 1.00 leal -16(%eax), %ecx +# CHECK-NEXT: 1 1 1.00 leaq -16(%eax), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(%rax), %cx +# CHECK-NEXT: 1 1 1.00 leal -16(%rax), %ecx +# CHECK-NEXT: 1 1 1.00 leaq -16(%rax), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(,%ebx), %cx +# CHECK-NEXT: 1 1 1.00 leal -16(,%ebx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq -16(,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(,%rbx), %cx +# CHECK-NEXT: 1 1 1.00 leal -16(,%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq -16(,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(,%ebx), %cx +# CHECK-NEXT: 1 1 1.00 leal -16(,%ebx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq -16(,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(,%rbx), %cx +# CHECK-NEXT: 1 1 1.00 leal -16(,%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq -16(,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(,%ebx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal -16(,%ebx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq -16(,%ebx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(,%rbx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal -16(,%rbx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq -16(,%rbx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 1 2 1.00 leal -16(%eax,%ebx), %ecx +# CHECK-NEXT: 1 2 1.00 leaq -16(%eax,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 1 2 1.00 leal -16(%rax,%rbx), %ecx +# CHECK-NEXT: 1 2 1.00 leaq -16(%rax,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: 1 2 1.00 leal -16(%eax,%ebx), %ecx +# CHECK-NEXT: 1 2 1.00 leaq -16(%eax,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: 1 2 1.00 leal -16(%rax,%rbx), %ecx +# CHECK-NEXT: 1 2 1.00 leaq -16(%rax,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(%eax,%ebx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal -16(%eax,%ebx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq -16(%eax,%ebx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw -16(%rax,%rbx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal -16(%rax,%rbx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq -16(%rax,%rbx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024, %cx +# CHECK-NEXT: 1 1 1.00 leal 1024, %ecx +# CHECK-NEXT: 1 1 1.00 leaq 1024, %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(%eax), %cx +# CHECK-NEXT: 1 1 1.00 leal 1024(%eax), %ecx +# CHECK-NEXT: 1 1 1.00 leaq 1024(%eax), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(%rax), %cx +# CHECK-NEXT: 1 1 1.00 leal 1024(%rax), %ecx +# CHECK-NEXT: 1 1 1.00 leaq 1024(%rax), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(,%ebx), %cx +# CHECK-NEXT: 1 1 1.00 leal 1024(,%ebx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq 1024(,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(,%rbx), %cx +# CHECK-NEXT: 1 1 1.00 leal 1024(,%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq 1024(,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(,%ebx), %cx +# CHECK-NEXT: 1 1 1.00 leal 1024(,%ebx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq 1024(,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(,%rbx), %cx +# CHECK-NEXT: 1 1 1.00 leal 1024(,%rbx), %ecx +# CHECK-NEXT: 1 1 1.00 leaq 1024(,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(,%ebx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal 1024(,%ebx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq 1024(,%ebx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(,%rbx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal 1024(,%rbx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq 1024(,%rbx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 1 2 1.00 leal 1024(%eax,%ebx), %ecx +# CHECK-NEXT: 1 2 1.00 leaq 1024(%eax,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 1 2 1.00 leal 1024(%rax,%rbx), %ecx +# CHECK-NEXT: 1 2 1.00 leaq 1024(%rax,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: 1 2 1.00 leal 1024(%eax,%ebx), %ecx +# CHECK-NEXT: 1 2 1.00 leaq 1024(%eax,%ebx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: 1 2 1.00 leal 1024(%rax,%rbx), %ecx +# CHECK-NEXT: 1 2 1.00 leaq 1024(%rax,%rbx), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(%eax,%ebx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal 1024(%eax,%ebx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq 1024(%eax,%ebx,2), %rcx +# CHECK-NEXT: 2 1 1.50 leaw 1024(%rax,%rbx,2), %cx +# CHECK-NEXT: 1 2 1.00 leal 1024(%rax,%rbx,2), %ecx +# CHECK-NEXT: 1 2 1.00 leaq 1024(%rax,%rbx,2), %rcx # CHECK: Resources: # CHECK-NEXT: [0.0] - PdAGLU01 @@ -311,142 +311,142 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] -# CHECK-NEXT: - - - - - 67.50 67.50 - - - - - - - - - - - - - - - - +# CHECK-NEXT: - - - - - 157.50 157.50 - - - - - - - - - - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 0, %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 0, %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 0, %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (%eax), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (%eax), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (%eax), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (%rax), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (%rax), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (%rax), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (,%ebx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (,%ebx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (,%ebx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (,%rbx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (,%rbx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (,%rbx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (%eax,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (%eax,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (%rax,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (%rax,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (%eax,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (%eax,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (%eax,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (%rax,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (%rax,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (%rax,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (%eax,%ebx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (%eax,%ebx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (%eax,%ebx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw (%rax,%rbx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal (%rax,%rbx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq (%rax,%rbx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16, %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16, %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16, %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(%eax), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(%eax), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(%eax), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(%rax), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(%rax), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(%rax), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(,%ebx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(,%ebx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(,%ebx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(,%rbx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(,%rbx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(,%rbx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024, %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024, %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024, %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(%eax), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(%eax), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(%eax), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(%rax), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(%rax), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(%rax), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(,%ebx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(,%ebx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(,%ebx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(,%rbx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(,%rbx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(,%rbx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx,2), %rcx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx,2), %cx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx,2), %ecx -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 0, %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 0, %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 0, %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (%eax), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (%eax), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (%eax), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (%rax), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (%rax), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (%rax), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (,%ebx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (,%ebx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (,%ebx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (,%rbx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (,%rbx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (,%rbx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (%eax,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (%eax,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (%rax,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (%rax,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (%eax,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (%eax,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (%eax,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (%rax,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (%rax,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (%rax,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (%eax,%ebx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (%eax,%ebx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (%eax,%ebx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw (%rax,%rbx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal (%rax,%rbx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq (%rax,%rbx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16, %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16, %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16, %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(%eax), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(%eax), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(%eax), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(%rax), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(%rax), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(%rax), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(,%ebx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(,%ebx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(,%ebx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(,%rbx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(,%rbx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(,%rbx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(%eax,%ebx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(%eax,%ebx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(%eax,%ebx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw -16(%rax,%rbx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal -16(%rax,%rbx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq -16(%rax,%rbx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024, %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024, %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024, %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(%eax), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(%eax), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(%eax), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(%rax), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(%rax), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(%rax), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(,%ebx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(,%ebx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(,%ebx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(,%rbx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(,%rbx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(,%rbx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(%eax,%ebx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(%eax,%ebx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(%eax,%ebx,2), %rcx +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - - - - leaw 1024(%rax,%rbx,2), %cx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leal 1024(%rax,%rbx,2), %ecx +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 1024(%rax,%rbx,2), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s b/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s @@ -7,10 +7,10 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 # CHECK-NEXT: Total Cycles: 206 -# CHECK-NEXT: Total uOps: 400 +# CHECK-NEXT: Total uOps: 300 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.94 +# CHECK-NEXT: uOps Per Cycle: 1.46 # CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 2.0 @@ -23,7 +23,7 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 1 0.50 leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: 1 2 1.00 leaq 8(%rsp,%rdi,2), %rax # CHECK-NEXT: 2 6 2.00 * vbroadcastss (%rax), %ymm0 # CHECK: Resources: @@ -53,22 +53,22 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] -# CHECK-NEXT: 0.50 0.50 - - - 0.50 0.50 - 2.00 2.00 - - - - 1.00 1.00 - - - 0.50 0.50 - - +# CHECK-NEXT: 0.50 0.50 - - - 1.00 1.00 - 2.00 2.00 - - - - 1.00 1.00 - - - 0.50 0.50 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - leaq 8(%rsp,%rdi,2), %rax # CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - - - 1.00 1.00 - - - 0.50 0.50 - - vbroadcastss (%rax), %ymm0 # CHECK: Timeline view: # CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax +# CHECK: [0,0] DeeER. . . leaq 8(%rsp,%rdi,2), %rax # CHECK-NEXT: [0,1] DeeeeeeER . . vbroadcastss (%rax), %ymm0 -# CHECK-NEXT: [1,0] .DeE----R . . leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: [1,0] DeeE----R . . leaq 8(%rsp,%rdi,2), %rax # CHECK-NEXT: [1,1] .DeeeeeeER. . vbroadcastss (%rax), %ymm0 -# CHECK-NEXT: [2,0] . DeE----R. . leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: [2,0] .D=eeE---R. . leaq 8(%rsp,%rdi,2), %rax # CHECK-NEXT: [2,1] . D==eeeeeeER vbroadcastss (%rax), %ymm0 # CHECK: Average Wait times (based on the timeline view): @@ -78,6 +78,6 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 1.0 1.0 2.7 leaq 8(%rsp,%rdi,2), %rax -# CHECK-NEXT: 1. 3 1.7 0.7 0.0 vbroadcastss (%rax), %ymm0 -# CHECK-NEXT: 3 1.3 0.8 1.3 +# CHECK-NEXT: 0. 3 1.3 1.3 2.3 leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: 1. 3 1.7 1.0 0.0 vbroadcastss (%rax), %ymm0 +# CHECK-NEXT: 3 1.5 1.2 1.2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-clzero.s --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-clzero.s @@ -12,7 +12,7 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 5 0.50 U clzero +# CHECK-NEXT: 1 5 0.50 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -30,4 +30,4 @@ # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - - - - 0.50 0.50 clzero +# CHECK-NEXT: - - - - - - 0.50 0.50 clzero diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s @@ -12,7 +12,7 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 U clzero +# CHECK-NEXT: 1 8 0.50 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 @@ -30,7 +30,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: diff --git a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s --- a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s @@ -64,6 +64,11 @@ # ZNVER1-NEXT: IPC: 0.25 # ZNVER1-NEXT: Block RThroughput: 0.8 +# ZNVER2: Dispatch Width: 4 +# ZNVER2-NEXT: uOps Per Cycle: 0.38 +# ZNVER2-NEXT: IPC: 0.25 +# ZNVER2-NEXT: Block RThroughput: 0.8 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -92,6 +97,9 @@ # ZNVER1-NEXT: 1 1 0.25 addl %edi, %esi # ZNVER1-NEXT: 2 5 0.50 * bextrl %esi, (%rdi), %eax +# ZNVER2-NEXT: 1 1 0.25 addl %edi, %esi +# ZNVER2-NEXT: 2 5 0.33 * bextrl %esi, (%rdi), %eax + # ALL: Timeline view: # BDVER2-NEXT: Index 012345678 @@ -100,6 +108,7 @@ # HASWELL-NEXT: Index 0123456789 # SKYLAKE-NEXT: Index 0123456789 # ZNVER1-NEXT: Index 01234567 +# ZNVER2-NEXT: Index 01234567 # BDVER2: [0,0] DeER . . addl %edi, %esi # BDVER2-NEXT: [0,1] DeeeeeeER bextrl %esi, (%rdi), %eax @@ -119,6 +128,9 @@ # ZNVER1: [0,0] DeER . . addl %edi, %esi # ZNVER1-NEXT: [0,1] DeeeeeER bextrl %esi, (%rdi), %eax +# ZNVER2: [0,0] DeER . . addl %edi, %esi +# ZNVER2-NEXT: [0,1] DeeeeeER bextrl %esi, (%rdi), %eax + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s --- a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s @@ -39,6 +39,11 @@ # ZNVER1-NEXT: IPC: 0.25 # ZNVER1-NEXT: Block RThroughput: 0.8 +# ZNVER2: Dispatch Width: 4 +# ZNVER2-NEXT: uOps Per Cycle: 0.38 +# ZNVER2-NEXT: IPC: 0.25 +# ZNVER2-NEXT: Block RThroughput: 0.8 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -54,6 +59,7 @@ # HASWELL-NEXT: 2 6 0.50 * bzhil %esi, (%rdi), %eax # SKYLAKE-NEXT: 2 6 0.50 * bzhil %esi, (%rdi), %eax # ZNVER1-NEXT: 2 5 0.50 * bzhil %esi, (%rdi), %eax +# ZNVER2-NEXT: 2 5 0.33 * bzhil %esi, (%rdi), %eax # ALL: Timeline view: @@ -61,6 +67,7 @@ # HASWELL-NEXT: Index 012345678 # SKYLAKE-NEXT: Index 012345678 # ZNVER1-NEXT: Index 01234567 +# ZNVER2-NEXT: Index 01234567 # BDWELL: [0,0] DeER . . addl %edi, %esi # BDWELL-NEXT: [0,1] DeeeeeeER bzhil %esi, (%rdi), %eax @@ -74,6 +81,9 @@ # ZNVER1: [0,0] DeER . . addl %edi, %esi # ZNVER1-NEXT: [0,1] DeeeeeER bzhil %esi, (%rdi), %eax +# ZNVER2: [0,0] DeER . . addl %edi, %esi +# ZNVER2-NEXT: [0,1] DeeeeeER bzhil %esi, (%rdi), %eax + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s @@ -114,7 +114,7 @@ # ZNVER1-NEXT: Index 0123456789 # ZNVER2-NEXT: 0123456789 -# ZNVER2-NEXT: Index 0123456789 +# ZNVER2-NEXT: Index 0123456789 0 # BARCELONA: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # BARCELONA-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 @@ -140,7 +140,7 @@ # ZNVER1: [0,0] DeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # ZNVER1-NEXT: [0,1] D=======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 -# ZNVER2: [0,0] DeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 +# ZNVER2: [0,0] DeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # ZNVER2-NEXT: [0,1] D========eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 # ALL: Average Wait times (based on the timeline view): diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s @@ -31,6 +31,9 @@ # ZNVER1-NEXT: Total Cycles: 110 # ZNVER1-NEXT: Total uOps: 400 +# ZNVER2-NEXT: Total Cycles: 110 +# ZNVER2-NEXT: Total uOps: 400 + # BDWELL: Dispatch Width: 4 # BDWELL-NEXT: uOps Per Cycle: 3.52 # BDWELL-NEXT: IPC: 2.82 @@ -237,46 +240,46 @@ # ZNVER1-NEXT: [9,2] . . DeE-------R addq $32, %r8 # ZNVER1-NEXT: [9,3] . . D=eE------R cmpl %edi, %edx -# ZNVER2: [0,0] DeER . . . . addl $1, %edx -# ZNVER2-NEXT: [0,1] DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [0,2] DeE-------R . . addq $32, %r8 -# ZNVER2-NEXT: [0,3] D=eE------R . . cmpl %edi, %edx -# ZNVER2-NEXT: [1,0] .DeE------R . . addl $1, %edx -# ZNVER2-NEXT: [1,1] .DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [1,2] .DeE-------R . . addq $32, %r8 -# ZNVER2-NEXT: [1,3] .D=eE------R . . cmpl %edi, %edx -# ZNVER2-NEXT: [2,0] . DeE------R . . addl $1, %edx -# ZNVER2-NEXT: [2,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [2,2] . DeE-------R . . addq $32, %r8 -# ZNVER2-NEXT: [2,3] . D=eE------R . . cmpl %edi, %edx -# ZNVER2-NEXT: [3,0] . DeE------R . . addl $1, %edx -# ZNVER2-NEXT: [3,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [3,2] . DeE-------R . . addq $32, %r8 -# ZNVER2-NEXT: [3,3] . D=eE------R . . cmpl %edi, %edx -# ZNVER2-NEXT: [4,0] . DeE------R . . addl $1, %edx -# ZNVER2-NEXT: [4,1] . DeeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [4,2] . DeE-------R. . addq $32, %r8 -# ZNVER2-NEXT: [4,3] . D=eE------R. . cmpl %edi, %edx -# ZNVER2-NEXT: [5,0] . DeE------R. . addl $1, %edx -# ZNVER2-NEXT: [5,1] . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [5,2] . DeE-------R . addq $32, %r8 -# ZNVER2-NEXT: [5,3] . D=eE------R . cmpl %edi, %edx -# ZNVER2-NEXT: [6,0] . .DeE------R . addl $1, %edx -# ZNVER2-NEXT: [6,1] . .DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [6,2] . .DeE-------R . addq $32, %r8 -# ZNVER2-NEXT: [6,3] . .D=eE------R . cmpl %edi, %edx -# ZNVER2-NEXT: [7,0] . . DeE------R . addl $1, %edx -# ZNVER2-NEXT: [7,1] . . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [7,2] . . DeE-------R . addq $32, %r8 -# ZNVER2-NEXT: [7,3] . . D=eE------R . cmpl %edi, %edx -# ZNVER2-NEXT: [8,0] . . DeE------R . addl $1, %edx -# ZNVER2-NEXT: [8,1] . . DeeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [8,2] . . DeE-------R. addq $32, %r8 -# ZNVER2-NEXT: [8,3] . . D=eE------R. cmpl %edi, %edx -# ZNVER2-NEXT: [9,0] . . DeE------R. addl $1, %edx -# ZNVER2-NEXT: [9,1] . . DeeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 -# ZNVER2-NEXT: [9,2] . . DeE-------R addq $32, %r8 -# ZNVER2-NEXT: [9,3] . . D=eE------R cmpl %edi, %edx +# ZNVER2: [0,0] DeER . . . . addl $1, %edx +# ZNVER2-NEXT: [0,1] DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [0,2] DeE-------R . . addq $32, %r8 +# ZNVER2-NEXT: [0,3] D=eE------R . . cmpl %edi, %edx +# ZNVER2-NEXT: [1,0] .DeE------R . . addl $1, %edx +# ZNVER2-NEXT: [1,1] .DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [1,2] .DeE-------R . . addq $32, %r8 +# ZNVER2-NEXT: [1,3] .D=eE------R . . cmpl %edi, %edx +# ZNVER2-NEXT: [2,0] . DeE------R . . addl $1, %edx +# ZNVER2-NEXT: [2,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [2,2] . DeE-------R . . addq $32, %r8 +# ZNVER2-NEXT: [2,3] . D=eE------R . . cmpl %edi, %edx +# ZNVER2-NEXT: [3,0] . DeE------R . . addl $1, %edx +# ZNVER2-NEXT: [3,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [3,2] . DeE-------R . . addq $32, %r8 +# ZNVER2-NEXT: [3,3] . D=eE------R . . cmpl %edi, %edx +# ZNVER2-NEXT: [4,0] . DeE------R . . addl $1, %edx +# ZNVER2-NEXT: [4,1] . DeeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [4,2] . DeE-------R. . addq $32, %r8 +# ZNVER2-NEXT: [4,3] . D=eE------R. . cmpl %edi, %edx +# ZNVER2-NEXT: [5,0] . DeE------R. . addl $1, %edx +# ZNVER2-NEXT: [5,1] . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [5,2] . DeE-------R . addq $32, %r8 +# ZNVER2-NEXT: [5,3] . D=eE------R . cmpl %edi, %edx +# ZNVER2-NEXT: [6,0] . .DeE------R . addl $1, %edx +# ZNVER2-NEXT: [6,1] . .DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [6,2] . .DeE-------R . addq $32, %r8 +# ZNVER2-NEXT: [6,3] . .D=eE------R . cmpl %edi, %edx +# ZNVER2-NEXT: [7,0] . . DeE------R . addl $1, %edx +# ZNVER2-NEXT: [7,1] . . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [7,2] . . DeE-------R . addq $32, %r8 +# ZNVER2-NEXT: [7,3] . . D=eE------R . cmpl %edi, %edx +# ZNVER2-NEXT: [8,0] . . DeE------R . addl $1, %edx +# ZNVER2-NEXT: [8,1] . . DeeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [8,2] . . DeE-------R. addq $32, %r8 +# ZNVER2-NEXT: [8,3] . . D=eE------R. cmpl %edi, %edx +# ZNVER2-NEXT: [9,0] . . DeE------R. addl $1, %edx +# ZNVER2-NEXT: [9,1] . . DeeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 +# ZNVER2-NEXT: [9,2] . . DeE-------R addq $32, %r8 +# ZNVER2-NEXT: [9,3] . . D=eE------R cmpl %edi, %edx # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s --- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -89,7 +89,7 @@ # ZNVER1-NEXT: [3] Maximum number of used buffer entries. # ZNVER1-NEXT: [4] Total number of buffer entries. -# ZNVER2: Scheduler's queue usage: +# ZNVER2: Scheduler's queue usage: # ZNVER2-NEXT: [1] Resource name. # ZNVER2-NEXT: [2] Average number of used buffer entries. # ZNVER2-NEXT: [3] Maximum number of used buffer entries. @@ -134,3 +134,8 @@ # ZNVER1-NEXT: ZnAGU 0 0 28 # ZNVER1-NEXT: ZnALU 0 1 56 # ZNVER1-NEXT: ZnFPU 0 0 36 + +# ZNVER2: [1] [2] [3] [4] +# ZNVER2-NEXT: Zn2AGU 0 0 28 +# ZNVER2-NEXT: Zn2ALU 0 1 64 +# ZNVER2-NEXT: Zn2FPU 0 0 36 diff --git a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s --- a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s +++ b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s @@ -35,7 +35,7 @@ # BARCELONA-NEXT: 0123456789 # BARCELONA-NEXT: Index 0123456789 0123 -# BDVER2-NEXT: 01234567 +# BDVER2-NEXT: 012345678 # BDVER2-NEXT: Index 0123456789 # BROADWELL-NEXT: 0123456789 @@ -59,8 +59,8 @@ # BARCELONA: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 -# BDVER2: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax -# BDVER2-NEXT: [0,1] D=eeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 +# BDVER2: [0,0] DeeER. . . . leaq 8(%rsp,%rdi,2), %rax +# BDVER2-NEXT: [0,1] D==eeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 # BROADWELL: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 @@ -92,8 +92,8 @@ # BARCELONA-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 # BARCELONA-NEXT: 1 1.5 0.5 0.0 -# BDVER2-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 -# BDVER2-NEXT: 1 1.5 0.5 0.0 +# BDVER2-NEXT: 1. 1 3.0 0.0 0.0 sqrtss (%rax), %xmm1 +# BDVER2-NEXT: 1 2.0 0.5 0.0 # BROADWELL-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 # BROADWELL-NEXT: 1 1.5 0.5 0.0 @@ -120,7 +120,7 @@ # BARCELONA-NEXT: 0123456789 0 # BARCELONA-NEXT: Index 0123456789 0123456789 -# BDVER2-NEXT: 01234567 +# BDVER2-NEXT: 012345678 # BDVER2-NEXT: Index 0123456789 # BROADWELL-NEXT: 0123456789 @@ -144,8 +144,8 @@ # BARCELONA: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 -# BDVER2: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax -# BDVER2-NEXT: [0,1] D=eeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 +# BDVER2: [0,0] DeeER. . . . leaq 8(%rsp,%rdi,2), %rax +# BDVER2-NEXT: [0,1] D==eeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 # BROADWELL: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 @@ -177,8 +177,8 @@ # BARCELONA-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 # BARCELONA-NEXT: 1 1.5 0.5 0.0 -# BDVER2-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 -# BDVER2-NEXT: 1 1.5 0.5 0.0 +# BDVER2-NEXT: 1. 1 3.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# BDVER2-NEXT: 1 2.0 0.5 0.0 # BROADWELL-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 # BROADWELL-NEXT: 1 1.5 0.5 0.0 @@ -203,7 +203,7 @@ # ALL: Timeline view: # BARCELONA-NEXT: 01234 -# BDVER2-NEXT: 0123 +# BDVER2-NEXT: 01234 # BROADWELL-NEXT: 0123 # BTVER2-NEXT: 01 # HASWELL-NEXT: 0123 @@ -216,8 +216,8 @@ # BARCELONA: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeER rsqrtss (%rax), %xmm1 -# BDVER2: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax -# BDVER2-NEXT: [0,1] D=eeeeeeeeeeER rsqrtss (%rax), %xmm1 +# BDVER2: [0,0] DeeER. . . leaq 8(%rsp,%rdi,2), %rax +# BDVER2-NEXT: [0,1] D==eeeeeeeeeeER rsqrtss (%rax), %xmm1 # BROADWELL: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeER rsqrtss (%rax), %xmm1 @@ -249,8 +249,8 @@ # BARCELONA-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 # BARCELONA-NEXT: 1 1.5 0.5 0.0 -# BDVER2-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 -# BDVER2-NEXT: 1 1.5 0.5 0.0 +# BDVER2-NEXT: 1. 1 3.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# BDVER2-NEXT: 1 2.0 0.5 0.0 # BROADWELL-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 # BROADWELL-NEXT: 1 1.5 0.5 0.0 @@ -275,7 +275,7 @@ # ALL: Timeline view: # BARCELONA-NEXT: 01234 -# BDVER2-NEXT: 0123 +# BDVER2-NEXT: 01234 # BROADWELL-NEXT: 0123 # BTVER2-NEXT: 01 # HASWELL-NEXT: 0123 @@ -288,8 +288,8 @@ # BARCELONA: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeER rcpss (%rax), %xmm1 -# BDVER2: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax -# BDVER2-NEXT: [0,1] D=eeeeeeeeeeER rcpss (%rax), %xmm1 +# BDVER2: [0,0] DeeER. . . leaq 8(%rsp,%rdi,2), %rax +# BDVER2-NEXT: [0,1] D==eeeeeeeeeeER rcpss (%rax), %xmm1 # BROADWELL: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeER rcpss (%rax), %xmm1 @@ -321,8 +321,8 @@ # BARCELONA-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 # BARCELONA-NEXT: 1 1.5 0.5 0.0 -# BDVER2-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 -# BDVER2-NEXT: 1 1.5 0.5 0.0 +# BDVER2-NEXT: 1. 1 3.0 0.0 0.0 rcpss (%rax), %xmm1 +# BDVER2-NEXT: 1 2.0 0.5 0.0 # BROADWELL-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 # BROADWELL-NEXT: 1 1.5 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s @@ -191,4 +191,3 @@ # ZNVER2-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ZNVER2-NEXT: 1 1.0 0.5 0.0 - diff --git a/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h b/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h @@ -0,0 +1,29 @@ +//===- LinalgToSPIRV.h - Linalg to SPIR-V dialect conversion ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides patterns for Linalg to SPIR-V dialect conversion. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_LINALGTOSPIRV_LINALGTOSPIRV_H +#define MLIR_CONVERSION_LINALGTOSPIRV_LINALGTOSPIRV_H + +namespace mlir { +class MLIRContext; +class OwningRewritePatternList; +class SPIRVTypeConverter; + +/// Appends to a pattern list additional patterns for translating Linalg ops to +/// SPIR-V ops. +void populateLinalgToSPIRVPatterns(MLIRContext *context, + SPIRVTypeConverter &typeConverter, + OwningRewritePatternList &patterns); + +} // namespace mlir + +#endif // MLIR_CONVERSION_LINALGTOSPIRV_LINALGTOSPIRV_H diff --git a/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h b/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h @@ -0,0 +1,25 @@ +//===- LinalgToSPIRVPass.h - Linalg to SPIR-V conversion pass --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a pass for Linalg to SPIR-V dialect conversion. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_STANDARDTOSPIRV_LINALGTOSPIRVPASS_H +#define MLIR_CONVERSION_STANDARDTOSPIRV_LINALGTOSPIRVPASS_H + +#include "mlir/Pass/Pass.h" + +namespace mlir { + +/// Creates and returns a pass to convert Linalg ops to SPIR-V ops. +std::unique_ptr> createLinalgToSPIRVPass(); + +} // namespace mlir + +#endif // MLIR_CONVERSION_STANDARDTOSPIRV_LINALGTOSPIRVPASS_H diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td @@ -25,6 +25,7 @@ SPV_ScopeAttr:$memory_scope, SPV_MemorySemanticsAttr:$semantics ); + let results = (outs SPV_Integer:$result ); @@ -42,9 +43,19 @@ SPV_MemorySemanticsAttr:$semantics, SPV_Integer:$value ); + let results = (outs SPV_Integer:$result ); + + let builders = [ + OpBuilder< + [{Builder *builder, OperationState &state, Value pointer, + ::mlir::spirv::Scope scope, ::mlir::spirv::MemorySemantics memory, + Value value}], + [{build(builder, state, value.getType(), pointer, scope, memory, value);}] + > + ]; } // ----- diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td @@ -446,14 +446,22 @@ let regions = (region AnyRegion:$body); let extraClassDeclaration = [{ - // Returns the selection header block. + /// Returns the selection header block. Block *getHeaderBlock(); - // Returns the selection merge block. + /// Returns the selection merge block. Block *getMergeBlock(); - // Adds a selection merge block containing one spv._merge op. + /// Adds a selection merge block containing one spv._merge op. void addMergeBlock(); + + /// Creates a spv.selection op for `if () then { }` + /// with `builder`. `builder`'s insertion point will remain at after the + /// newly inserted spv.selection op afterwards. + static SelectionOp createIfThen( + Location loc, Value condition, + llvm::function_ref thenBody, + OpBuilder *builder); }]; let hasOpcode = 0; diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVLowering.h @@ -58,6 +58,8 @@ OwningRewritePatternList &patterns); namespace spirv { +class AccessChainOp; + class SPIRVConversionTarget : public ConversionTarget { public: /// Creates a SPIR-V conversion target for the given target environment. @@ -90,6 +92,16 @@ Value getBuiltinVariableValue(Operation *op, BuiltIn builtin, OpBuilder &builder); +/// Performs the index computation to get to the element at `indices` of the +/// memory pointed to by `basePtr`, using the layout map of `baseType`. + +// TODO(ravishankarm) : This method assumes that the `baseType` is a MemRefType +// with AffineMap that has static strides. Extend to handle dynamic strides. +spirv::AccessChainOp getElementPtr(SPIRVTypeConverter &typeConverter, + MemRefType baseType, Value basePtr, + ArrayRef indices, Location loc, + OpBuilder &builder); + /// Sets the InterfaceVarABIAttr and EntryPointABIAttr for a function and its /// arguments. LogicalResult setABIAttrs(FuncOp funcOp, EntryPointABIAttr entryPointInfo, diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h --- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h +++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h @@ -54,6 +54,12 @@ /// target environment (SPIR-V 1.0 with Shader capability and no extra /// extensions) if not provided. TargetEnvAttr lookupTargetEnvOrDefault(Operation *op); + +/// Queries the local workgroup size from entry point ABI on the nearest +/// function-like op containing the given `op`. Returns null attribute if not +/// found. +DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op); + } // namespace spirv } // namespace mlir diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(GPUToROCDL) add_subdirectory(GPUToSPIRV) add_subdirectory(LinalgToLLVM) +add_subdirectory(LinalgToSPIRV) add_subdirectory(LoopsToGPU) add_subdirectory(LoopToStandard) add_subdirectory(StandardToLLVM) diff --git a/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/lib/Conversion/LinalgToSPIRV/CMakeLists.txt @@ -0,0 +1,16 @@ +add_llvm_library(MLIRLinalgToSPIRVTransforms + LinalgToSPIRV.cpp + LinalgToSPIRVPass.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SPIRV + ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR + ) + +target_link_libraries(MLIRLinalgToSPIRVTransforms + MLIRIR + MLIRLinalgOps + MLIRPass + MLIRSPIRV + MLIRSupport + ) diff --git a/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRV.cpp b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRV.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRV.cpp @@ -0,0 +1,264 @@ +//===- LinalgToSPIRV.cpp - Linalg to SPIR-V dialect conversion ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/SPIRV/SPIRVDialect.h" +#include "mlir/Dialect/SPIRV/SPIRVLowering.h" +#include "mlir/Dialect/SPIRV/SPIRVOps.h" +#include "mlir/Dialect/StandardOps/Ops.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/Matchers.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// Utilities +//===----------------------------------------------------------------------===// + +/// Returns true if all of the given `op`'s operands and values are of memref +/// types. +static bool areAllValuesMemref(Operation *op) { + auto isOfMemrefType = [](Value val) { + return val.getType().isa(); + }; + + return llvm::all_of(op->getOperands(), isOfMemrefType) && + llvm::all_of(op->getResults(), isOfMemrefType); +} + +/// Returns true if the given Linalg `iterators` is one reduction. +static bool isLinalgSingleReductionIterator(ArrayAttr iterators) { + if (iterators.getValue().size() != 1) + return false; + + auto iterator = (*iterators.begin()).dyn_cast(); + if (iterator.getValue() != getReductionIteratorTypeName()) + return false; + + return true; +} + +/// Returns a `Value` containing the `dim`-th dimension's size of SPIR-V +/// location invocation ID. This function will create necessary operations with +/// `builder` at the proper region containing `op`. +static Value getLocalInvocationDimSize(Operation *op, int dim, Location loc, + OpBuilder *builder) { + assert(dim >= 0 && dim < 3 && "local invocation only has three dimensions"); + Value invocation = spirv::getBuiltinVariableValue( + op, spirv::BuiltIn::LocalInvocationId, *builder); + Type xType = invocation.getType().cast().getElementType(); + return builder->create( + loc, xType, invocation, builder->getI32ArrayAttr({dim})); +} + +namespace { +enum class BinaryOpKind { + Unknown, + IAdd, +}; +} + +/// Returns the binary op kind if the given linalg.generic op has the following +/// body: +/// +/// ``` +/// linalg.generic ... { +/// ^bb(%a: , %b: ): +/// %0 = %a, %b: +/// linalg.yield %0: +/// } +/// ``` +static BinaryOpKind getScalarBinaryOpKind(linalg::GenericOp op) { + auto ®ion = op.region(); + if (region.empty() || !has_single_element(region.getBlocks())) + return BinaryOpKind::Unknown; + + Block &block = region.front(); + if (block.getNumArguments() != 2 || + !block.getArgument(0).getType().isIntOrFloat() || + !block.getArgument(1).getType().isIntOrFloat()) + return BinaryOpKind::Unknown; + + auto &ops = block.getOperations(); + if (!has_single_element(block.without_terminator())) + return BinaryOpKind::Unknown; + + using mlir::matchers::m_Val; + auto a = m_Val(block.getArgument(0)); + auto b = m_Val(block.getArgument(1)); + + auto addPattern = m_Op(m_Op(a, b)); + if (addPattern.match(&ops.back())) + return BinaryOpKind::IAdd; + + return BinaryOpKind::Unknown; +} + +//===----------------------------------------------------------------------===// +// Reduction (single workgroup) +//===----------------------------------------------------------------------===// + +namespace { + +/// A pattern to convert a linalg.generic op to SPIR-V ops under the condition +/// that the linalg.generic op is performing reduction with a workload size that +/// can fit in one workgroup. +class SingleWorkgroupReduction final + : public SPIRVOpLowering { +public: + using SPIRVOpLowering::SPIRVOpLowering; + + PatternMatchResult + matchAndRewrite(linalg::GenericOp genericOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; + +} // namespace + +PatternMatchResult SingleWorkgroupReduction::matchAndRewrite( + linalg::GenericOp genericOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + Operation *op = genericOp.getOperation(); + + // Make sure the linalg.generic is working on memrefs. + if (!areAllValuesMemref(op)) + return matchFailure(); + + // Make sure this is reudction with one input and one output. + if (genericOp.args_in().getZExtValue() != 1 || + genericOp.args_out().getZExtValue() != 1) + return matchFailure(); + + auto originalInputType = op->getOperand(0).getType().cast(); + auto originalOutputType = op->getOperand(1).getType().cast(); + + // Make sure the original input has one dimension. + if (!originalInputType.hasStaticShape() || originalInputType.getRank() != 1) + return matchFailure(); + // Make sure the original output has one element. + if (!originalOutputType.hasStaticShape() || + originalOutputType.getNumElements() != 1) + return matchFailure(); + + if (!isLinalgSingleReductionIterator(genericOp.iterator_types())) + return matchFailure(); + + if (genericOp.indexing_maps().getValue().size() != 2) + return matchFailure(); + + auto inputMap = genericOp.indexing_maps().getValue()[0].cast(); + auto outputMap = + genericOp.indexing_maps().getValue()[1].cast(); + // The indexing map for the input should be `(i) -> (i)`. + if (inputMap.getValue() != + AffineMap::get(1, 0, {getAffineDimExpr(0, op->getContext())})) + return matchFailure(); + // The indexing map for the input should be `(i) -> (0)`. + if (outputMap.getValue() != + AffineMap::get(1, 0, {getAffineConstantExpr(0, op->getContext())})) + return matchFailure(); + + auto binaryOpKind = getScalarBinaryOpKind(genericOp); + if (binaryOpKind == BinaryOpKind::Unknown) + return matchFailure(); + + // Query the shader interface for local workgroup size to make sure the + // invocation configuration fits with the input memref's shape. + DenseIntElementsAttr localSize = spirv::lookupLocalWorkGroupSize(genericOp); + if (!localSize) + return matchFailure(); + + if ((*localSize.begin()).getSExtValue() != originalInputType.getDimSize(0)) + return matchFailure(); + if (llvm::any_of(llvm::drop_begin(localSize.getIntValues(), 1), + [](const APInt &size) { return !size.isOneValue(); })) + return matchFailure(); + + // TODO(antiagainst): Query the target environment to make sure the current + // workload fits in a local workgroup. + + Value convertedInput = operands[0], convertedOutput = operands[1]; + Location loc = genericOp.getLoc(); + + // Get the invocation ID. + Value x = getLocalInvocationDimSize(genericOp, /*dim=*/0, loc, &rewriter); + + // TODO(antiagainst): Load to Workgroup storage class first. + + // Get the input element accessed by this invocation. + Value inputElementPtr = spirv::getElementPtr( + typeConverter, originalInputType, convertedInput, {x}, loc, rewriter); + Value inputElement = rewriter.create(loc, inputElementPtr); + + // Perform the group reduction operation. + Value groupOperation; +#define CREATE_GROUP_NON_UNIFORM_BIN_OP(opKind, spvOp) \ + case BinaryOpKind::opKind: { \ + groupOperation = rewriter.create( \ + loc, originalInputType.getElementType(), spirv::Scope::Subgroup, \ + spirv::GroupOperation::Reduce, inputElement, \ + /*cluster_size=*/ArrayRef()); \ + } break + switch (binaryOpKind) { + CREATE_GROUP_NON_UNIFORM_BIN_OP(IAdd, GroupNonUniformIAddOp); + case BinaryOpKind::Unknown: + llvm_unreachable("failed to reject match"); + } +#undef CREATE_GROUP_NON_UNIFORM_BIN_OP + + // Get the output element accessed by this reduction. + Value zero = spirv::ConstantOp::getZero( + typeConverter.getIndexType(rewriter.getContext()), loc, &rewriter); + SmallVector zeroIndices(originalOutputType.getRank(), zero); + Value outputElementPtr = + spirv::getElementPtr(typeConverter, originalOutputType, convertedOutput, + zeroIndices, loc, rewriter); + + // Write out the final reduction result. This should be only conducted by one + // invocation. We use spv.GroupNonUniformElect to find the invocation with the + // lowest ID. + // + // ``` + // if (spv.GroupNonUniformElect) { output = ... } + // ``` + + Value condition = rewriter.create( + loc, spirv::Scope::Subgroup); + + auto createAtomicOp = [&](OpBuilder *builder) { +#define CREATE_ATOMIC_BIN_OP(opKind, spvOp) \ + case BinaryOpKind::opKind: { \ + builder->create(loc, outputElementPtr, spirv::Scope::Device, \ + spirv::MemorySemantics::AcquireRelease, \ + groupOperation); \ + } break + switch (binaryOpKind) { + CREATE_ATOMIC_BIN_OP(IAdd, AtomicIAddOp); + case BinaryOpKind::Unknown: + llvm_unreachable("failed to reject match"); + } +#undef CREATE_ATOMIC_BIN_OP + }; + + spirv::SelectionOp::createIfThen(loc, condition, createAtomicOp, &rewriter); + + rewriter.eraseOp(genericOp); + return matchSuccess(); +} + +//===----------------------------------------------------------------------===// +// Pattern population +//===----------------------------------------------------------------------===// + +void mlir::populateLinalgToSPIRVPatterns(MLIRContext *context, + SPIRVTypeConverter &typeConverter, + OwningRewritePatternList &patterns) { + patterns.insert(context, typeConverter); +} diff --git a/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp @@ -0,0 +1,51 @@ +//===- LinalgToSPIRVPass.cpp - Linalg to SPIR-V conversion pass -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h" +#include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h" +#include "mlir/Dialect/SPIRV/SPIRVDialect.h" +#include "mlir/Dialect/SPIRV/SPIRVLowering.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +/// A pass converting MLIR Linalg ops into SPIR-V ops. +class LinalgToSPIRVPass : public ModulePass { + void runOnModule() override; +}; +} // namespace + +void LinalgToSPIRVPass::runOnModule() { + MLIRContext *context = &getContext(); + ModuleOp module = getModule(); + + SPIRVTypeConverter typeConverter; + OwningRewritePatternList patterns; + populateLinalgToSPIRVPatterns(context, typeConverter, patterns); + populateBuiltinFuncToSPIRVPatterns(context, typeConverter, patterns); + + auto targetEnv = spirv::lookupTargetEnvOrDefault(module); + std::unique_ptr target = + spirv::SPIRVConversionTarget::get(targetEnv, context); + + // Allow builtin ops. + target->addLegalOp(); + target->addDynamicallyLegalOp( + [&](FuncOp op) { return typeConverter.isSignatureLegal(op.getType()); }); + + if (failed(applyFullConversion(module, *target, patterns))) + return signalPassFailure(); +} + +std::unique_ptr> mlir::createLinalgToSPIRVPass() { + return std::make_unique(); +} + +static PassRegistration + pass("convert-linalg-to-spirv", "Convert Linalg ops to SPIR-V ops"); diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp --- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp @@ -141,48 +141,6 @@ } // namespace -//===----------------------------------------------------------------------===// -// Utility functions for operation conversion -//===----------------------------------------------------------------------===// - -/// Performs the index computation to get to the element pointed to by -/// `indices` using the layout map of `baseType`. - -// TODO(ravishankarm) : This method assumes that the `origBaseType` is a -// MemRefType with AffineMap that has static strides. Handle dynamic strides -static spirv::AccessChainOp getElementPtr(OpBuilder &builder, - SPIRVTypeConverter &typeConverter, - Location loc, MemRefType origBaseType, - Value basePtr, - ArrayRef indices) { - // Get base and offset of the MemRefType and verify they are static. - int64_t offset; - SmallVector strides; - if (failed(getStridesAndOffset(origBaseType, strides, offset)) || - llvm::is_contained(strides, MemRefType::getDynamicStrideOrOffset())) { - return nullptr; - } - - auto indexType = typeConverter.getIndexType(builder.getContext()); - - Value ptrLoc = nullptr; - assert(indices.size() == strides.size()); - for (auto index : enumerate(indices)) { - Value strideVal = builder.create( - loc, indexType, IntegerAttr::get(indexType, strides[index.index()])); - Value update = builder.create(loc, strideVal, index.value()); - ptrLoc = - (ptrLoc ? builder.create(loc, ptrLoc, update).getResult() - : update); - } - SmallVector linearizedIndices; - // Add a '0' at the start to index into the struct. - linearizedIndices.push_back(builder.create( - loc, indexType, IntegerAttr::get(indexType, 0))); - linearizedIndices.push_back(ptrLoc); - return builder.create(loc, basePtr, linearizedIndices); -} - //===----------------------------------------------------------------------===// // ConstantOp with composite type. //===----------------------------------------------------------------------===// @@ -331,9 +289,9 @@ LoadOpConversion::matchAndRewrite(LoadOp loadOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const { LoadOpOperandAdaptor loadOperands(operands); - auto loadPtr = getElementPtr(rewriter, typeConverter, loadOp.getLoc(), - loadOp.memref().getType().cast(), - loadOperands.memref(), loadOperands.indices()); + auto loadPtr = spirv::getElementPtr( + typeConverter, loadOp.memref().getType().cast(), + loadOperands.memref(), loadOperands.indices(), loadOp.getLoc(), rewriter); rewriter.replaceOpWithNewOp(loadOp, loadPtr); return matchSuccess(); } @@ -374,10 +332,10 @@ StoreOpConversion::matchAndRewrite(StoreOp storeOp, ArrayRef operands, ConversionPatternRewriter &rewriter) const { StoreOpOperandAdaptor storeOperands(operands); - auto storePtr = - getElementPtr(rewriter, typeConverter, storeOp.getLoc(), - storeOp.memref().getType().cast(), - storeOperands.memref(), storeOperands.indices()); + auto storePtr = spirv::getElementPtr( + typeConverter, storeOp.memref().getType().cast(), + storeOperands.memref(), storeOperands.indices(), storeOp.getLoc(), + rewriter); rewriter.replaceOpWithNewOp(storeOp, storePtr, storeOperands.value()); return matchSuccess(); diff --git a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp --- a/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp @@ -292,6 +292,41 @@ return builder.create(op->getLoc(), ptr); } +//===----------------------------------------------------------------------===// +// Index calculation +//===----------------------------------------------------------------------===// + +spirv::AccessChainOp mlir::spirv::getElementPtr( + SPIRVTypeConverter &typeConverter, MemRefType baseType, Value basePtr, + ArrayRef indices, Location loc, OpBuilder &builder) { + // Get base and offset of the MemRefType and verify they are static. + int64_t offset; + SmallVector strides; + if (failed(getStridesAndOffset(baseType, strides, offset)) || + llvm::is_contained(strides, MemRefType::getDynamicStrideOrOffset())) { + return nullptr; + } + + auto indexType = typeConverter.getIndexType(builder.getContext()); + + Value ptrLoc = nullptr; + assert(indices.size() == strides.size()); + for (auto index : enumerate(indices)) { + Value strideVal = builder.create( + loc, indexType, IntegerAttr::get(indexType, strides[index.index()])); + Value update = builder.create(loc, strideVal, index.value()); + ptrLoc = + (ptrLoc ? builder.create(loc, ptrLoc, update).getResult() + : update); + } + SmallVector linearizedIndices; + // Add a '0' at the start to index into the struct. + linearizedIndices.push_back(builder.create( + loc, indexType, IntegerAttr::get(indexType, 0))); + linearizedIndices.push_back(ptrLoc); + return builder.create(loc, basePtr, linearizedIndices); +} + //===----------------------------------------------------------------------===// // Set ABI attributes for lowering entry functions. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -2755,6 +2755,38 @@ builder.create(getLoc()); } +spirv::SelectionOp spirv::SelectionOp::createIfThen( + Location loc, Value condition, + llvm::function_ref thenBody, OpBuilder *builder) { + auto selectionControl = builder->getI32IntegerAttr( + static_cast(spirv::SelectionControl::None)); + auto selectionOp = builder->create(loc, selectionControl); + + selectionOp.addMergeBlock(); + Block *mergeBlock = selectionOp.getMergeBlock(); + Block *thenBlock = nullptr; + + // Build the "then" block. + { + OpBuilder::InsertionGuard guard(*builder); + thenBlock = builder->createBlock(mergeBlock); + thenBody(builder); + builder->create(loc, mergeBlock); + } + + // Build the header block. + { + OpBuilder::InsertionGuard guard(*builder); + builder->createBlock(thenBlock); + builder->create( + loc, condition, thenBlock, + /*trueArguments=*/ArrayRef(), mergeBlock, + /*falseArguments=*/ArrayRef()); + } + + return selectionOp; +} + namespace { // Blocks from the given `spv.selection` operation must satisfy the following // layout: diff --git a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp --- a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp +++ b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp @@ -9,6 +9,7 @@ #include "mlir/Dialect/SPIRV/TargetAndABI.h" #include "mlir/Dialect/SPIRV/SPIRVTypes.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/FunctionSupport.h" #include "mlir/IR/Operation.h" using namespace mlir; @@ -62,3 +63,16 @@ return attr; return getDefaultTargetEnv(op->getContext()); } + +DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) { + while (op && !op->hasTrait()) + op = op->getParentOp(); + if (!op) + return {}; + + if (auto attr = op->getAttrOfType( + spirv::getEntryPointABIAttrName())) + return attr.local_size(); + + return {}; +} diff --git a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir @@ -0,0 +1,162 @@ +// RUN: mlir-opt -split-input-file -convert-linalg-to-spirv -canonicalize -verify-diagnostics %s -o - | FileCheck %s + +//===----------------------------------------------------------------------===// +// Single workgroup reduction +//===----------------------------------------------------------------------===// + +#single_workgroup_reduction_trait = { + args_in = 1, + args_out = 1, + iterator_types = ["reduction"], + indexing_maps = [ + affine_map<(i) -> (i)>, + affine_map<(i) -> (0)> + ] +} + +module attributes { + spv.target_env = { + version = 3 : i32, + extensions = [], + capabilities = [1: i32, 63: i32] // Shader, GroupNonUniformArithmetic + } +} { + +// CHECK: spv.globalVariable +// CHECK-SAME: built_in("LocalInvocationId") + +// CHECK: func @single_workgroup_reduction +// CHECK-SAME: (%[[INPUT:.+]]: !spv.ptr{{.+}}, %[[OUTPUT:.+]]: !spv.ptr{{.+}}) + +// CHECK: %[[ZERO:.+]] = spv.constant 0 : i32 +// CHECK: %[[ID:.+]] = spv.Load "Input" %{{.+}} : vector<3xi32> +// CHECK: %[[X:.+]] = spv.CompositeExtract %[[ID]][0 : i32] + +// CHECK: %[[INPTR:.+]] = spv.AccessChain %[[INPUT]][%[[ZERO]], %[[X]]] +// CHECK: %[[VAL:.+]] = spv.Load "StorageBuffer" %[[INPTR]] : i32 +// CHECK: %[[ADD:.+]] = spv.GroupNonUniformIAdd "Subgroup" "Reduce" %[[VAL]] : i32 + +// CHECK: %[[OUTPTR:.+]] = spv.AccessChain %[[OUTPUT]][%[[ZERO]], %[[ZERO]]] +// CHECK: %[[ELECT:.+]] = spv.GroupNonUniformElect "Subgroup" : i1 + +// CHECK: spv.selection { +// CHECK: spv.BranchConditional %[[ELECT]], ^bb1, ^bb2 +// CHECK: ^bb1: +// CHECK: spv.AtomicIAdd "Device" "AcquireRelease" %[[OUTPTR]], %[[ADD]] +// CHECK: spv.Branch ^bb2 +// CHECK: ^bb2: +// CHECK: spv._merge +// CHECK: } +// CHECK: spv.Return + +func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) attributes { + spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>} +} { + linalg.generic #single_workgroup_reduction_trait %input, %output { + ^bb(%in: i32, %out: i32): + %sum = addi %in, %out : i32 + linalg.yield %sum : i32 + } : memref<16xi32>, memref<1xi32> + spv.Return +} +} + +// ----- + +// Missing shader entry point ABI + +#single_workgroup_reduction_trait = { + args_in = 1, + args_out = 1, + iterator_types = ["reduction"], + indexing_maps = [ + affine_map<(i) -> (i)>, + affine_map<(i) -> (0)> + ] +} + +module attributes { + spv.target_env = { + version = 3 : i32, + extensions = [], + capabilities = [1: i32, 63: i32] // Shader, GroupNonUniformArithmetic + } +} { +func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) { + // expected-error @+1 {{failed to legalize operation 'linalg.generic'}} + linalg.generic #single_workgroup_reduction_trait %input, %output { + ^bb(%in: i32, %out: i32): + %sum = addi %in, %out : i32 + linalg.yield %sum : i32 + } : memref<16xi32>, memref<1xi32> + return +} +} + +// ----- + +// Mismatch between shader entry point ABI and input memref shape + +#single_workgroup_reduction_trait = { + args_in = 1, + args_out = 1, + iterator_types = ["reduction"], + indexing_maps = [ + affine_map<(i) -> (i)>, + affine_map<(i) -> (0)> + ] +} + +module attributes { + spv.target_env = { + version = 3 : i32, + extensions = [], + capabilities = [1: i32, 63: i32] // Shader, GroupNonUniformArithmetic + } +} { +func @single_workgroup_reduction(%input: memref<16xi32>, %output: memref<1xi32>) attributes { + spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>} +} { + // expected-error @+1 {{failed to legalize operation 'linalg.generic'}} + linalg.generic #single_workgroup_reduction_trait %input, %output { + ^bb(%in: i32, %out: i32): + %sum = addi %in, %out : i32 + linalg.yield %sum : i32 + } : memref<16xi32>, memref<1xi32> + spv.Return +} +} + +// ----- + +// Unsupported multi-dimension input memref + +#single_workgroup_reduction_trait = { + args_in = 1, + args_out = 1, + iterator_types = ["parallel", "reduction"], + indexing_maps = [ + affine_map<(i, j) -> (i, j)>, + affine_map<(i, j) -> (i)> + ] +} + +module attributes { + spv.target_env = { + version = 3 : i32, + extensions = [], + capabilities = [1: i32, 63: i32] // Shader, GroupNonUniformArithmetic + } +} { +func @single_workgroup_reduction(%input: memref<16x8xi32>, %output: memref<16xi32>) attributes { + spv.entry_point_abi = {local_size = dense<[16, 8, 1]>: vector<3xi32>} +} { + // expected-error @+1 {{failed to legalize operation 'linalg.generic'}} + linalg.generic #single_workgroup_reduction_trait %input, %output { + ^bb(%in: i32, %out: i32): + %sum = addi %in, %out : i32 + linalg.yield %sum : i32 + } : memref<16x8xi32>, memref<16xi32> + spv.Return +} +} diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -40,6 +40,7 @@ MLIRQuantOps MLIRROCDLIR MLIRSPIRV + MLIRLinalgToSPIRVTransforms MLIRStandardToSPIRVTransforms MLIRSPIRVTestPasses MLIRSPIRVTransforms