diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -515,7 +515,8 @@ else { // Name conflict detection. // Function conflicts are subtle (overloading), so ignore them. - if (RenameDecl.getKind() != Decl::Function) { + if (RenameDecl.getKind() != Decl::Function && + RenameDecl.getKind() != Decl::CXXMethod) { if (auto *Conflict = lookupSiblingWithName(ASTCtx, RenameDecl, NewName)) Result = InvalidName{ InvalidName::Conflict, diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -1062,6 +1062,19 @@ )cpp", "conflict", !HeaderFile, "Conflict"}, + {R"cpp( + void func(int); + void [[o^therFunc]](double); + )cpp", + nullptr, !HeaderFile, "func"}, + {R"cpp( + struct S { + void func(int); + void [[o^therFunc]](double); + }; + )cpp", + nullptr, !HeaderFile, "func"}, + {R"cpp( int V^ar; )cpp", @@ -1121,9 +1134,7 @@ } else { EXPECT_TRUE(bool(Results)) << "rename returned an error: " << llvm::toString(Results.takeError()); - ASSERT_EQ(1u, Results->GlobalChanges.size()); - EXPECT_EQ(applyEdits(std::move(Results->GlobalChanges)).front().second, - expectedResult(T, NewName)); + EXPECT_EQ(Results->LocalChanges, T.ranges()); } } } diff --git a/clang/docs/ControlFlowIntegrity.rst b/clang/docs/ControlFlowIntegrity.rst --- a/clang/docs/ControlFlowIntegrity.rst +++ b/clang/docs/ControlFlowIntegrity.rst @@ -314,10 +314,8 @@ is a security hardening mechanism designed to be deployed in release builds. ``-fsanitize=function`` has a higher space and time overhead due to a more -complex type check at indirect call sites, as well as a need for run-time -type information (RTTI), which may make it unsuitable for deployment. Because -of the need for RTTI, ``-fsanitize=function`` can only be used with C++ -programs, whereas ``-fsanitize=cfi-icall`` can protect both C and C++ programs. +complex type check at indirect call sites, which may make it unsuitable for +deployment. On the other hand, ``-fsanitize=function`` conforms more closely with the C++ standard and user expectations around interaction with shared libraries; diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1373,37 +1373,38 @@ Language Extensions Back-ported to Previous Standards ===================================================== -=================================== ================================ ============= ============= ================================== -Feature Feature Test Macro Introduced In Backported To Required Flags -=================================== ================================ ============= ============= ================================== -variadic templates __cpp_variadic_templates C++11 C++03 -Alias templates __cpp_alias_templates C++11 C++03 -Non-static data member initializers __cpp_nsdmi C++11 C++03 -Range-based ``for`` loop __cpp_range_based_for C++11 C++03 -RValue references __cpp_rvalue_references C++11 C++03 -Attributes __cpp_attributes C++11 C++03 -fdouble-square-bracket-attributes -variable templates __cpp_variable_templates C++14 C++03 -Binary literals __cpp_binary_literals C++14 C++03 -Relaxed constexpr __cpp_constexpr C++14 C++11 -``if constexpr`` __cpp_if_constexpr C++17 C++11 -fold expressions __cpp_fold_expressions C++17 C++03 -Lambda capture of \*this by value __cpp_capture_star_this C++17 C++11 -Attributes on enums __cpp_enumerator_attributes C++17 C++11 -Guaranteed copy elision __cpp_guaranteed_copy_elision C++17 C++03 -Hexadecimal floating literals __cpp_hex_float C++17 C++03 -``inline`` variables __cpp_inline_variables C++17 C++03 -Attributes on namespaces __cpp_namespace_attributes C++17 C++11 -Structured bindings __cpp_structured_bindings C++17 C++03 -template template arguments __cpp_template_template_args C++17 C++03 -``static operator[]`` __cpp_multidimensional_subscript C++20 C++03 -Designated initializers __cpp_designated_initializers C++20 C++03 -Conditional ``explicit`` __cpp_conditional_explicit C++20 C++03 -``using enum`` __cpp_using_enum C++20 C++03 -``if consteval`` __cpp_if_consteval C++23 C++20 -``static operator()`` __cpp_static_call_operator C++23 C++03 ------------------------------------ -------------------------------- ------------- ------------- ---------------------------------- -Designated initializers C99 C89 -=================================== ================================ ============= ============= ================================== +====================================== ================================ ============= ============= ================================== +Feature Feature Test Macro Introduced In Backported To Required Flags +====================================== ================================ ============= ============= ================================== +variadic templates __cpp_variadic_templates C++11 C++03 +Alias templates __cpp_alias_templates C++11 C++03 +Non-static data member initializers __cpp_nsdmi C++11 C++03 +Range-based ``for`` loop __cpp_range_based_for C++11 C++03 +RValue references __cpp_rvalue_references C++11 C++03 +Attributes __cpp_attributes C++11 C++03 -fdouble-square-bracket-attributes +variable templates __cpp_variable_templates C++14 C++03 +Binary literals __cpp_binary_literals C++14 C++03 +Relaxed constexpr __cpp_constexpr C++14 C++11 +``if constexpr`` __cpp_if_constexpr C++17 C++11 +fold expressions __cpp_fold_expressions C++17 C++03 +Lambda capture of \*this by value __cpp_capture_star_this C++17 C++11 +Attributes on enums __cpp_enumerator_attributes C++17 C++11 +Guaranteed copy elision __cpp_guaranteed_copy_elision C++17 C++03 +Hexadecimal floating literals __cpp_hex_float C++17 C++03 +``inline`` variables __cpp_inline_variables C++17 C++03 +Attributes on namespaces __cpp_namespace_attributes C++17 C++11 +Structured bindings __cpp_structured_bindings C++17 C++03 +template template arguments __cpp_template_template_args C++17 C++03 +``static operator[]`` __cpp_multidimensional_subscript C++20 C++03 +Designated initializers __cpp_designated_initializers C++20 C++03 +Conditional ``explicit`` __cpp_conditional_explicit C++20 C++03 +``using enum`` __cpp_using_enum C++20 C++03 +``if consteval`` __cpp_if_consteval C++23 C++20 +``static operator()`` __cpp_static_call_operator C++23 C++03 +-------------------------------------- -------------------------------- ------------- ------------- ---------------------------------- +Designated initializers (N494) C99 C89 +Array & element qualification (N2607) C2x C89 +====================================== ================================ ============= ============= ================================== Type Trait Primitives ===================== diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -418,6 +418,10 @@ - Propagate the value-dependent bit for VAArgExpr. Fixes a crash where a __builtin_va_arg call has invalid arguments. (`#62711 `_). +- Fix crash on attempt to initialize union with flexible array member. + (`#61746 `_). +- Clang `TextNodeDumper` enabled through `-ast-dump` flag no longer evaluates the + initializer of constexpr `VarDecl` if the declaration has a dependent type. Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -100,7 +100,7 @@ by Clang (and by ISO/IEC/IEEE 60559 / IEEE 754) as producing either an infinity or NaN value, so is not included in ``-fsanitize=undefined``. - ``-fsanitize=function``: Indirect call of a function through a - function pointer of the wrong type (C++ only). + function pointer of the wrong type. - ``-fsanitize=implicit-unsigned-integer-truncation``, ``-fsanitize=implicit-signed-integer-truncation``: Implicit conversion from integer of larger bit width to smaller bit width, if that results in data diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1472,9 +1472,12 @@ /// Return the unique reference to a scalable vector type of the specified /// element type and scalable number of elements. + /// For RISC-V, number of fields is also provided when it fetching for + /// tuple type. /// /// \pre \p EltTy must be a built-in type. - QualType getScalableVectorType(QualType EltTy, unsigned NumElts) const; + QualType getScalableVectorType(QualType EltTy, unsigned NumElts, + unsigned NumFields = 1) const; /// Return a WebAssembly externref type. QualType getWebAssemblyExternrefType() const; diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def --- a/clang/include/clang/Basic/RISCVVTypes.def +++ b/clang/include/clang/Basic/RISCVVTypes.def @@ -144,6 +144,10 @@ RVV_PREDICATE_TYPE("__rvv_bool32_t", RvvBool32, RvvBool32Ty, 2) RVV_PREDICATE_TYPE("__rvv_bool64_t", RvvBool64, RvvBool64Ty, 1) +//===- Tuple vector types -------------------------------------------------===// + +RVV_VECTOR_TYPE_INT("__rvv_int32m1x2_t", RvvInt32m1x2, RvvInt32m1x2Ty, 2, 32, 2, true) + #undef RVV_VECTOR_TYPE_FLOAT #undef RVV_VECTOR_TYPE_INT #undef RVV_VECTOR_TYPE diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -1503,6 +1503,368 @@ defm : RVVIndexedSegLoad<"vluxseg">; defm : RVVIndexedSegLoad<"vloxseg">; } + +multiclass RVVUnitStridedSegLoadTuple { + foreach type = ["i"] in { + defvar eew = !cond(!eq(type, "i") : "32"); + foreach nf = [2] in { + let Name = op # nf # "e" # eew # "_v_tuple", + OverloadedName = op # nf # "e" # eew # "_tuple", + IRName = op # nf, + MaskedIRName = op # nf # "_mask", + NF = nf, + ManualCodegen = [{ + { + assert(((IsMasked && (PolicyAttrs & RVV_VTA) && (PolicyAttrs & RVV_VMA)) || + (!IsMasked && (PolicyAttrs & RVV_VTA))) && + "FIXME: Only handling default policy (TAMA) for now"); + + llvm::Type *ElementVectorType = cast(ResultType)->elements()[0]; + IntrinsicTypes = {ElementVectorType, Ops.back()->getType()}; + SmallVector Operands; + + Operands.append(NF, llvm::PoisonValue::get(ElementVectorType)); + + unsigned Offset = IsMasked ? 1 : 0; + Operands.push_back(Ops[Offset]); // Ptr + if (IsMasked) + Operands.push_back(Ops[0]); + Operands.push_back(Ops[Offset + 1]); // VL + if (IsMasked) + Operands.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs)); + + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + + llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); + if (ReturnValue.isNull()) + return LoadValue; + else + return Builder.CreateStore(LoadValue, ReturnValue.getValue()); + } + }] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin<"v", T # "vPCe", type>; + } + } + } +} + +multiclass RVVUnitStridedSegStoreTuple { + foreach type = ["i"] in { + defvar eew = !cond(!eq(type, "i") : "32"); + foreach nf = [2] in { + let Name = op # nf # "e" # eew # "_v_tuple", + OverloadedName = op # nf # "e" # eew # "_tuple", + IRName = op # nf, + MaskedIRName = op # nf # "_mask", + NF = nf, + HasMaskedOffOperand = false, + ManualCodegen = [{ + { + // Masked + // Builtin: (mask, ptr, v_tuple, vl) + // Intrinsic: (val0, val1, ..., ptr, mask, vl) + // Unmasked + // Builtin: (ptr, v_tuple, vl) + // Intrinsic: (val0, val1, ..., ptr, vl) + unsigned Offset = IsMasked ? 1 : 0; + llvm::Value *VTupleOperand = Ops[Offset + 1]; + + SmallVector Operands; + for (unsigned I = 0; I < NF; ++I) { + llvm::Value *V = Builder.CreateExtractValue(VTupleOperand, {I}); + Operands.push_back(V); + } + Operands.push_back(Ops[Offset]); // Ptr + if (IsMasked) + Operands.push_back(Ops[0]); + Operands.push_back(Ops[Offset + 2]); // VL + + IntrinsicTypes = {Operands[0]->getType(), Operands.back()->getType()}; + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + return Builder.CreateCall(F, Operands, ""); + } + }] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin<"v", "0Pe" # T # "v", type>; + } + } + } +} + +multiclass RVVUnitStridedSegLoadFFTuple { + foreach type = ["i"] in { + defvar eew = !cond(!eq(type, "i") : "32"); + foreach nf = [2] in { + let Name = op # nf # "e" # eew # "ff_v_tuple", + OverloadedName = op # nf # "e" # eew # "ff_tuple", + IRName = op # nf # "ff", + MaskedIRName = op # nf # "ff_mask", + NF = nf, + ManualCodegen = [{ + { + assert(((IsMasked && (PolicyAttrs & RVV_VTA) && (PolicyAttrs & RVV_VMA)) || + (!IsMasked && (PolicyAttrs & RVV_VTA))) && + "FIXME: Only handling default policy (TAMA) for now"); + + llvm::Type *ElementVectorType = cast(ResultType)->elements()[0]; + + IntrinsicTypes = {ElementVectorType, Ops.back()->getType()}; + SmallVector Operands; + + Operands.append(NF, llvm::PoisonValue::get(ElementVectorType)); + + unsigned Offset = IsMasked ? 1 : 0; + Operands.push_back(Ops[Offset]); // Ptr + if (IsMasked) + Operands.push_back(Ops[0]); + Operands.push_back(Ops[Offset + 2]); // vl + if (IsMasked) + Operands.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs)); + + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + + llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); + // Get alignment from the new vl operand + clang::CharUnits Align = + CGM.getNaturalPointeeTypeAlignment(E->getArg(Offset + 1)->getType()); + + llvm::Value *ReturnTuple = llvm::PoisonValue::get(ResultType); + for (unsigned I = 0; I < NF; ++I) { + llvm::Value *V = Builder.CreateExtractValue(LoadValue, {I}); + ReturnTuple = Builder.CreateInsertValue(ReturnTuple, V, {I}); + } + + // Store new_vl + llvm::Value *V = Builder.CreateExtractValue(LoadValue, {NF}); + Builder.CreateStore(V, Address(Ops[Offset + 1], V->getType(), Align)); + + if (ReturnValue.isNull()) + return ReturnTuple; + else + return Builder.CreateStore(ReturnTuple, ReturnValue.getValue()); + } + }] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin<"v", T # "vPCePz", type>; + } + } + } +} + +multiclass RVVStridedSegLoadTuple { + foreach type = ["i"] in { + defvar eew = !cond(!eq(type, "i") : "32"); + foreach nf = [2] in { + let Name = op # nf # "e" # eew # "_v_tuple", + OverloadedName = op # nf # "e" # eew # "_tuple", + IRName = op # nf, + MaskedIRName = op # nf # "_mask", + NF = nf, + ManualCodegen = [{ + { + assert(((IsMasked && (PolicyAttrs & RVV_VTA) && (PolicyAttrs & RVV_VMA)) || + (!IsMasked && (PolicyAttrs & RVV_VTA))) && + "FIXME: Only handling default policy (TAMA) for now"); + + llvm::Type *ElementVectorType = cast(ResultType)->elements()[0]; + + IntrinsicTypes = {ElementVectorType, Ops.back()->getType()}; + SmallVector Operands; + + Operands.append(NF, llvm::PoisonValue::get(ElementVectorType)); + + unsigned Offset = IsMasked ? 1 : 0; + Operands.push_back(Ops[Offset]); // Ptr + Operands.push_back(Ops[Offset + 1]); // Stride + if (IsMasked) + Operands.push_back(Ops[0]); + Operands.push_back(Ops[Offset + 2]); // VL + if (IsMasked) + Operands.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs)); + + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); + + if (ReturnValue.isNull()) + return LoadValue; + else + return Builder.CreateStore(LoadValue, ReturnValue.getValue()); + } + }] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin<"v", T # "vPCet", type>; + } + } + } +} + +multiclass RVVStridedSegStoreTuple { + foreach type = ["i"] in { + defvar eew = !cond(!eq(type, "i") : "32"); + foreach nf = [2] in { + let Name = op # nf # "e" # eew # "_v_tuple", + OverloadedName = op # nf # "e" # eew # "_tuple", + IRName = op # nf, + MaskedIRName = op # nf # "_mask", + NF = nf, + HasMaskedOffOperand = false, + MaskedPolicyScheme = NonePolicy, + ManualCodegen = [{ + { + // Masked + // Builtin: (mask, ptr, stride, v_tuple, vl) + // Intrinsic: (val0, val1, ..., ptr, stride, mask, vl) + // Unmasked + // Builtin: (ptr, stride, v_tuple, vl) + // Intrinsic: (val0, val1, ..., ptr, stride, vl) + unsigned Offset = IsMasked ? 1 : 0; + llvm::Value *VTupleOperand = Ops[Offset + 2]; + + SmallVector Operands; + for (unsigned I = 0; I < NF; ++I) { + llvm::Value *V = Builder.CreateExtractValue(VTupleOperand, {I}); + Operands.push_back(V); + } + Operands.push_back(Ops[Offset]); // Ptr + Operands.push_back(Ops[Offset + 1]); // Stride + if (IsMasked) + Operands.push_back(Ops[0]); + Operands.push_back(Ops[Offset + 3]); // VL + + IntrinsicTypes = {Operands[0]->getType(), Operands.back()->getType()}; + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + return Builder.CreateCall(F, Operands, ""); + } + }] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin<"v", "0Pet" # T # "v", type>; + } + } + } +} + +multiclass RVVIndexedSegLoadTuple { + foreach type = ["i"] in { + foreach eew_info = [["32", "(Log2EEW:5)"]] in { + defvar eew = eew_info[0]; + defvar eew_type = eew_info[1]; + foreach nf = [2] in { + let Name = op # nf # "ei" # eew # "_v_tuple", + OverloadedName = op # nf # "ei" # eew # "_tuple", + IRName = op # nf, + MaskedIRName = op # nf # "_mask", + NF = nf, + ManualCodegen = [{ + { + assert(((IsMasked && (PolicyAttrs & RVV_VTA) && (PolicyAttrs & RVV_VMA)) || + (!IsMasked && (PolicyAttrs & RVV_VTA))) && + "FIXME: Only handling default policy (TAMA) for now"); + + llvm::Type *ElementVectorType = cast(ResultType)->elements()[0]; + + SmallVector Operands; + + Operands.append(NF, llvm::PoisonValue::get(ElementVectorType)); + + unsigned Offset = IsMasked ? 1 : 0; + Operands.push_back(Ops[Offset]); // Ptr + Operands.push_back(Ops[Offset + 1]); // Idx + if (IsMasked) + Operands.push_back(Ops[0]); + Operands.push_back(Ops[Offset + 2]); // VL + if (IsMasked) + Operands.push_back(ConstantInt::get(Ops.back()->getType(), PolicyAttrs)); + + IntrinsicTypes = {ElementVectorType, Ops[Offset + 1]->getType(), + Ops.back()->getType()}; + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); + + if (ReturnValue.isNull()) + return LoadValue; + else + return Builder.CreateStore(LoadValue, ReturnValue.getValue()); + } + }] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin<"v", T # "vPCe" # eew_type # "Uv", type>; + } + } + } + } +} + +multiclass RVVIndexedSegStoreTuple { + foreach type = ["i"] in { + foreach eew_info = [["32", "(Log2EEW:5)"]] in { + defvar eew = eew_info[0]; + defvar eew_type = eew_info[1]; + foreach nf = [2] in { + let Name = op # nf # "ei" # eew # "_v_tuple", + OverloadedName = op # nf # "ei" # eew # "_tuple", + IRName = op # nf, + MaskedIRName = op # nf # "_mask", + NF = nf, + HasMaskedOffOperand = false, + MaskedPolicyScheme = NonePolicy, + ManualCodegen = [{ + { + // Masked + // Builtin: (mask, ptr, index, v_tuple, vl) + // Intrinsic: (val0, val1, ..., ptr, index, mask, vl) + // Unmasked + // Builtin: (ptr, index, v_tuple, vl) + // Intrinsic: (val0, val1, ..., ptr, index, vl) + unsigned Offset = IsMasked ? 1 : 0; + llvm::Value *VTupleOperand = Ops[Offset + 2]; + + SmallVector Operands; + for (unsigned I = 0; I < NF; ++I) { + llvm::Value *V = Builder.CreateExtractValue(VTupleOperand, {I}); + Operands.push_back(V); + } + Operands.push_back(Ops[Offset]); // Ptr + Operands.push_back(Ops[Offset + 1]); // Idx + if (IsMasked) + Operands.push_back(Ops[0]); + Operands.push_back(Ops[Offset + 3]); // VL + + IntrinsicTypes = {Operands[0]->getType(), Ops[Offset + 1]->getType(), + Operands.back()->getType()}; + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + return Builder.CreateCall(F, Operands, ""); + } + }] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin<"v", "0Pe" # eew_type # "Uv" # T # "v", type>; + } + } + } + } +} + +// TODO: Extend for policy +let UnMaskedPolicyScheme = NonePolicy, + MaskedPolicyScheme = NonePolicy, + IsTuple = true in { +defm : RVVUnitStridedSegLoadTuple<"vlseg">; +defm : RVVUnitStridedSegLoadFFTuple<"vlseg">; +defm : RVVStridedSegLoadTuple<"vlsseg">; +defm : RVVIndexedSegLoadTuple<"vluxseg">; +defm : RVVIndexedSegLoadTuple<"vloxseg">; +} + +let UnMaskedPolicyScheme = NonePolicy, + MaskedPolicyScheme = NonePolicy, + IsTuple = true in { +defm : RVVUnitStridedSegStoreTuple<"vsseg">; +defm : RVVStridedSegStoreTuple<"vssseg">; +defm : RVVIndexedSegStoreTuple<"vsuxseg">; +defm : RVVIndexedSegStoreTuple<"vsoxseg">; +} + + let UnMaskedPolicyScheme = NonePolicy, MaskedPolicyScheme = NonePolicy in { defm : RVVUnitStridedSegStore<"vsseg">; @@ -2174,6 +2536,11 @@ let Name = "vget_v", MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ { + if (isa(Ops[0]->getType())) // For tuple type + // Extract value from index (operand 1) of vtuple (operand 0) + return Builder.CreateExtractValue( + Ops[0], + {(unsigned)cast(Ops[1])->getZExtValue()}); auto *VecTy = cast(ResultType); auto *OpVecTy = cast(Ops[0]->getType()); // Mask to only valid indices. @@ -2191,11 +2558,22 @@ def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilxfd", dst_lmul # "v">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUvKz", "csil", dst_lmul # "Uv">; } + foreach nf = [2] in { + let Log2LMUL = [0] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin; + } + } } let Name = "vset_v", Log2LMUL = [0, 1, 2], MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ { + if (isa(ResultType)) // For tuple type + // Insert value (operand 2) into index (operand 1) of vtuple (operand 0) + return Builder.CreateInsertValue( + Ops[0], Ops[2], + {(unsigned)cast(Ops[1])->getZExtValue()}); auto *ResVecTy = cast(ResultType); auto *VecTy = cast(Ops[2]->getType()); // Mask to only valid indices. @@ -2213,5 +2591,11 @@ def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">; } + foreach nf = [2] in { + let Log2LMUL = [0] in { + defvar T = "(Tuple:" # nf # ")"; + def : RVVBuiltin; + } + } } } diff --git a/clang/include/clang/Basic/riscv_vector_common.td b/clang/include/clang/Basic/riscv_vector_common.td --- a/clang/include/clang/Basic/riscv_vector_common.td +++ b/clang/include/clang/Basic/riscv_vector_common.td @@ -231,6 +231,9 @@ // Number of fields for Load/Store Segment instructions. int NF = 1; + + // Set to true if the builtin is associated with tuple types. + bit IsTuple = false; } // This is the code emitted in the header. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3494,7 +3494,8 @@ def mwatchos_version_min_EQ : Joined<["-"], "mwatchos-version-min=">, Group; def mwatchos_simulator_version_min_EQ : Joined<["-"], "mwatchos-simulator-version-min=">; def mwatchsimulator_version_min_EQ : Joined<["-"], "mwatchsimulator-version-min=">, Alias; -def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>; +def march_EQ : Joined<["-"], "march=">, Group, Flags<[CoreOption]>, + HelpText<"For a list of available architectures for the target use '-mcpu=help'">; def masm_EQ : Joined<["-"], "masm=">, Group, Flags<[NoXarchOption]>; def inline_asm_EQ : Joined<["-"], "inline-asm=">, Group, Flags<[CC1Option]>, Values<"att,intel">, @@ -3518,7 +3519,8 @@ def mguard_EQ : Joined<["-"], "mguard=">, Group, Flags<[NoXarchOption]>, HelpText<"Enable or disable Control Flow Guard checks and guard tables emission">, Values<"none,cf,cf-nochecks">; -def mcpu_EQ : Joined<["-"], "mcpu=">, Group; +def mcpu_EQ : Joined<["-"], "mcpu=">, Group, + HelpText<"For a list of available CPUs for the target use '-mcpu=help'">; def mmcu_EQ : Joined<["-"], "mmcu=">, Group; def msim : Flag<["-"], "msim">, Group; def mdynamic_no_pic : Joined<["-"], "mdynamic-no-pic">, Group; @@ -5830,7 +5832,7 @@ Group, CodeGenOpts<"EnableAssignmentTracking">, NormalizedValuesScope<"CodeGenOptions::AssignmentTrackingOpts">, Values<"disabled,enabled,forced">, NormalizedValues<["Disabled","Enabled","Forced"]>, - MarshallingInfoEnum, "Enabled">; + MarshallingInfoEnum, "Disabled">; } // let Flags = [CC1Option, NoDriverOption] diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h --- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h +++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h @@ -58,6 +58,7 @@ SFixedLog2LMUL1, SFixedLog2LMUL2, SFixedLog2LMUL3, + Tuple2, }; // Similar to basic type but used to describe what's kind of type related to @@ -243,6 +244,8 @@ unsigned ElementBitwidth = 0; VScaleVal Scale = 0; bool Valid; + bool IsTuple = false; + unsigned NF = 0; std::string BuiltinStr; std::string ClangBuiltinStr; @@ -293,10 +296,15 @@ } bool isConstant() const { return IsConstant; } bool isPointer() const { return IsPointer; } + bool isTuple() const { return IsTuple; } unsigned getElementBitwidth() const { return ElementBitwidth; } ScalarTypeKind getScalarType() const { return ScalarType; } VScaleVal getScale() const { return Scale; } + unsigned getNF() const { + assert(NF > 1 && NF < 8 && "Only legal NF should be fetched"); + return NF; + } private: // Verify RVV vector type and set Valid. @@ -444,7 +452,7 @@ computeBuiltinTypes(llvm::ArrayRef Prototype, bool IsMasked, bool HasMaskedOffOperand, bool HasVL, unsigned NF, PolicyScheme DefaultScheme, - Policy PolicyAttrs); + Policy PolicyAttrs, bool IsTuple); static llvm::SmallVector getSupportedUnMaskedPolicies(); static llvm::SmallVector @@ -512,6 +520,7 @@ bool HasMaskedOffOperand : 1; bool HasTailPolicy : 1; bool HasMaskPolicy : 1; + bool IsTuple : 1; uint8_t UnMaskedPolicyScheme : 2; uint8_t MaskedPolicyScheme : 2; }; diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -4042,8 +4042,8 @@ /// getScalableVectorType - Return the unique reference to a scalable vector /// type of the specified element type and size. VectorType must be a built-in /// type. -QualType ASTContext::getScalableVectorType(QualType EltTy, - unsigned NumElts) const { +QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts, + unsigned NumFields) const { if (Target->hasAArch64SVETypes()) { uint64_t EltTySize = getTypeSize(EltTy); #define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \ @@ -4067,15 +4067,15 @@ uint64_t EltTySize = getTypeSize(EltTy); #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, \ IsFP) \ - if (!EltTy->isBooleanType() && \ - ((EltTy->hasIntegerRepresentation() && \ - EltTy->hasSignedIntegerRepresentation() == IsSigned) || \ - (EltTy->hasFloatingRepresentation() && IsFP)) && \ - EltTySize == ElBits && NumElts == NumEls) \ - return SingletonId; + if (!EltTy->isBooleanType() && \ + ((EltTy->hasIntegerRepresentation() && \ + EltTy->hasSignedIntegerRepresentation() == IsSigned) || \ + (EltTy->hasFloatingRepresentation() && IsFP)) && \ + EltTySize == ElBits && NumElts == NumEls && NumFields == NF) \ + return SingletonId; #define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \ - if (EltTy->isBooleanType() && NumElts == NumEls) \ - return SingletonId; + if (EltTy->isBooleanType() && NumElts == NumEls) \ + return SingletonId; #include "clang/Basic/RISCVVTypes.def" } return QualType(); diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1821,7 +1821,8 @@ if (D->hasInit()) { const Expr *E = D->getInit(); // Only dump the value of constexpr VarDecls for now. - if (E && !E->isValueDependent() && D->isConstexpr()) { + if (E && !E->isValueDependent() && D->isConstexpr() && + !D->getType()->isDependentType()) { const APValue *Value = D->evaluateValue(); if (Value) AddChild("value", [=] { Visit(*Value, E->getType()); }); diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -399,6 +399,18 @@ } } +void transferArrowOpCall(const Expr *UnwrapExpr, const Expr *ObjectExpr, + LatticeTransferState &State) { + if (auto *OptionalVal = + getValueBehindPossiblePointer(*ObjectExpr, State.Env)) { + if (auto *Loc = maybeInitializeOptionalValueMember( + UnwrapExpr->getType()->getPointeeType(), *OptionalVal, State.Env)) { + State.Env.setValueStrict(*UnwrapExpr, + State.Env.create(*Loc)); + } + } +} + void transferMakeOptionalCall(const CallExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { @@ -774,25 +786,22 @@ transferUnwrapCall(E, E->getImplicitObjectArgument(), State); }) - // optional::operator*, optional::operator-> - // FIXME: This does something slightly strange for `operator->`. - // `transferUnwrapCall()` may create a new value of type `T` for the - // `optional`, and it associates that value with `E`. In the case of - // `operator->`, `E` is a pointer. As a result, we associate an - // expression of pointer type with a storage location of non-pointer type - // `T`. This can confound other code that expects expressions of - // pointer type to be associated with `PointerValue`s, such as the - // centrally provided accessors `getImplicitObjectLocation()` and - // `getBaseObjectLocation()`, and this is the reason we need to use our - // own 'maybeSkipPointer()` and `getValueBehindPossiblePointer()` instead - // of these accessors. - .CaseOfCFGStmt(valueOperatorCall(std::nullopt), + // optional::operator* + .CaseOfCFGStmt(isOptionalOperatorCallWithName("*"), [](const CallExpr *E, const MatchFinder::MatchResult &, LatticeTransferState &State) { transferUnwrapCall(E, E->getArg(0), State); }) + // optional::operator-> + .CaseOfCFGStmt(isOptionalOperatorCallWithName("->"), + [](const CallExpr *E, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + transferArrowOpCall(E, E->getArg(0), State); + }) + // optional::has_value .CaseOfCFGStmt( isOptionalMemberCallWithName("has_value"), diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -48,10 +48,8 @@ static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS, Environment &Env) { - if (auto *LHSValue = - dyn_cast_or_null(Env.getValue(LHS, SkipPast::Reference))) - if (auto *RHSValue = - dyn_cast_or_null(Env.getValue(RHS, SkipPast::Reference))) + if (auto *LHSValue = dyn_cast_or_null(Env.getValueStrict(LHS))) + if (auto *RHSValue = dyn_cast_or_null(Env.getValueStrict(RHS))) return Env.makeIff(*LHSValue, *RHSValue); return Env.makeAtomicBoolValue(); @@ -121,9 +119,7 @@ // value, if any unpacking occured. Also, does the lvalue-to-rvalue conversion, // by skipping past the reference. static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) { - // FIXME: this is too flexible: it _allows_ a reference, while it should - // _require_ one, since lvalues should always be wrapped in `ReferenceValue`. - auto *Loc = Env.getStorageLocation(E, SkipPast::Reference); + auto *Loc = Env.getStorageLocationStrict(E); if (Loc == nullptr) return nullptr; auto *Val = Env.getValue(*Loc); @@ -139,6 +135,29 @@ return &UnpackedVal; } +static void propagateValue(const Expr &From, const Expr &To, Environment &Env) { + if (auto *Val = Env.getValueStrict(From)) + Env.setValueStrict(To, *Val); +} + +static void propagateStorageLocation(const Expr &From, const Expr &To, + Environment &Env) { + if (auto *Loc = Env.getStorageLocationStrict(From)) + Env.setStorageLocationStrict(To, *Loc); +} + +// Forwards the value or storage location of `From` to `To` in cases where +// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff +// `From` is a glvalue. +static void propagateValueOrStorageLocation(const Expr &From, const Expr &To, + Environment &Env) { + assert(From.isGLValue() == To.isGLValue()); + if (From.isGLValue()) + propagateStorageLocation(From, To, Env); + else + propagateValue(From, To, Env); +} + namespace { class TransferVisitor : public ConstStmtVisitor { @@ -155,13 +174,11 @@ switch (S->getOpcode()) { case BO_Assign: { - auto *LHSLoc = Env.getStorageLocation(*LHS, SkipPast::Reference); + auto *LHSLoc = Env.getStorageLocationStrict(*LHS); if (LHSLoc == nullptr) break; - // No skipping should be necessary, because any lvalues should have - // already been stripped off in evaluating the LValueToRValue cast. - auto *RHSVal = Env.getValue(*RHS, SkipPast::None); + auto *RHSVal = Env.getValueStrict(*RHS); if (RHSVal == nullptr) break; @@ -276,7 +293,7 @@ return; } - if (auto *InitExprVal = Env.getValue(*InitExpr, SkipPast::None)) + if (auto *InitExprVal = Env.getValueStrict(*InitExpr)) Env.setValue(Loc, *InitExprVal); if (Env.getValue(Loc) == nullptr) { @@ -443,7 +460,7 @@ } case UO_LNot: { auto *SubExprVal = - dyn_cast_or_null(Env.getValue(*SubExpr, SkipPast::None)); + dyn_cast_or_null(Env.getValueStrict(*SubExpr)); if (SubExprVal == nullptr) break; @@ -653,19 +670,13 @@ const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) - return; - - Env.setStorageLocation(*S, *SubExprLoc); + propagateValue(*SubExpr, *S, Env); } } void VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *S) { - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); if (Value *Val = Env.createValue(S->getType())) - Env.setValue(Loc, *Val); + Env.setValueStrict(*S, *Val); } void VisitCallExpr(const CallExpr *S) { @@ -703,22 +714,20 @@ const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) + Value *SubExprVal = Env.getValueStrict(*SubExpr); + if (SubExprVal == nullptr) return; - Env.setStorageLocation(*S, *SubExprLoc); + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocationStrict(*S, Loc); + Env.setValue(Loc, *SubExprVal); } void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *S) { const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) - return; - - Env.setStorageLocation(*S, *SubExprLoc); + propagateValue(*SubExpr, *S, Env); } void VisitCXXStaticCastExpr(const CXXStaticCastExpr *S) { @@ -726,11 +735,7 @@ const Expr *SubExpr = S->getSubExpr(); assert(SubExpr != nullptr); - auto *SubExprLoc = Env.getStorageLocation(*SubExpr, SkipPast::None); - if (SubExprLoc == nullptr) - return; - - Env.setStorageLocation(*S, *SubExprLoc); + propagateValueOrStorageLocation(*SubExpr, *S, Env); } } @@ -738,10 +743,14 @@ // FIXME: Revisit this once flow conditions are added to the framework. For // `a = b ? c : d` we can add `b => a == c && !b => a == d` to the flow // condition. - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - if (Value *Val = Env.createValue(S->getType())) - Env.setValue(Loc, *Val); + if (S->isGLValue()) { + auto &Loc = Env.createStorageLocation(*S); + Env.setStorageLocationStrict(*S, Loc); + if (Value *Val = Env.createValue(S->getType())) + Env.setValue(Loc, *Val); + } else if (Value *Val = Env.createValue(S->getType())) { + Env.setValueStrict(*S, *Val); + } } void VisitInitListExpr(const InitListExpr *S) { @@ -780,9 +789,7 @@ } void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *S) { - auto &Loc = Env.createStorageLocation(*S); - Env.setStorageLocation(*S, Loc); - Env.setValue(Loc, Env.getBoolLiteralValue(S->getValue())); + Env.setValueStrict(*S, Env.getBoolLiteralValue(S->getValue())); } void VisitParenExpr(const ParenExpr *S) { @@ -814,11 +821,11 @@ if (!SubExprEnv) return nullptr; - if (auto *Val = dyn_cast_or_null( - SubExprEnv->getValue(SubExpr, SkipPast::Reference))) + if (auto *Val = + dyn_cast_or_null(SubExprEnv->getValueStrict(SubExpr))) return Val; - if (Env.getStorageLocation(SubExpr, SkipPast::None) == nullptr) { + if (Env.getValueStrict(SubExpr) == nullptr) { // Sub-expressions that are logic operators are not added in basic blocks // (e.g. see CFG for `bool d = a && (b || c);`). If `SubExpr` is a logic // operator, it may not have been evaluated and assigned a value yet. In @@ -827,8 +834,7 @@ Visit(&SubExpr); } - if (auto *Val = dyn_cast_or_null( - Env.getValue(SubExpr, SkipPast::Reference))) + if (auto *Val = dyn_cast_or_null(Env.getValueStrict(SubExpr))) return Val; // If the value of `SubExpr` is still unknown, we create a fresh symbolic diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -123,13 +123,10 @@ private: TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) { // The terminator sub-expression might not be evaluated. - if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr) + if (Env.getValueStrict(Cond) == nullptr) transfer(StmtToEnv, Cond, Env); - // FIXME: The flow condition must be an r-value, so `SkipPast::None` should - // suffice. - auto *Val = - cast_or_null(Env.getValue(Cond, SkipPast::Reference)); + auto *Val = cast_or_null(Env.getValueStrict(Cond)); // Value merging depends on flow conditions from different environments // being mutually exclusive -- that is, they cannot both be true in their // entirety (even if they may share some clauses). So, we need *some* value @@ -219,7 +216,8 @@ // operator includes a branch that contains a noreturn destructor call. // // See `NoreturnDestructorTest` for concrete examples. - if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { + if (Block.succ_begin()->getReachableBlock() != nullptr && + Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { auto &StmtToBlock = AC.CFCtx.getStmtToBlock(); auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt()); assert(StmtBlock != StmtToBlock.end()); @@ -303,18 +301,14 @@ auto *InitStmt = Init->getInit(); assert(InitStmt != nullptr); - auto *InitStmtLoc = Env.getStorageLocation(*InitStmt, SkipPast::Reference); - if (InitStmtLoc == nullptr) - return; - - auto *InitStmtVal = Env.getValue(*InitStmtLoc); - if (InitStmtVal == nullptr) - return; - if (Member->getType()->isReferenceType()) { + auto *InitStmtLoc = Env.getStorageLocationStrict(*InitStmt); + if (InitStmtLoc == nullptr) + return; + auto &MemberLoc = ThisLoc.getChild(*Member); Env.setValue(MemberLoc, Env.create(*InitStmtLoc)); - } else { + } else if (auto *InitStmtVal = Env.getValueStrict(*InitStmt)) { auto &MemberLoc = ThisLoc.getChild(*Member); Env.setValue(MemberLoc, *InitStmtVal); } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19793,6 +19793,14 @@ ICEArguments |= (1 << 2); for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + // Handle aggregate argument, namely RVV tuple types in segment load/store + if (hasAggregateEvaluationKind(E->getArg(i)->getType())) { + LValue L = EmitAggExprToLValue(E->getArg(i)); + llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this)); + Ops.push_back(AggValue); + continue; + } + // If this is a normal argument, just emit it as a scalar. if ((ICEArguments & (1 << i)) == 0) { Ops.push_back(EmitScalarExpr(E->getArg(i))); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -3132,30 +3132,51 @@ llvm::StructType *STy = dyn_cast(ArgI.getCoerceToType()); if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy && STy->getNumElements() > 1) { - uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy); - llvm::Type *DstTy = Ptr.getElementType(); - uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy); + llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy); + llvm::TypeSize PtrElementSize = + CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType()); + if (StructSize.isScalable()) { + assert(STy->containsHomogeneousScalableVectorTypes() && + "ABI only supports structure with homogeneous scalable vector " + "type"); + assert(StructSize == PtrElementSize && + "Only allow non-fractional movement of structure with" + "homogeneous scalable vector type"); + assert(STy->getNumElements() == NumIRArgs); + + llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + auto *AI = Fn->getArg(FirstIRArg + i); + AI->setName(Arg->getName() + ".coerce" + Twine(i)); + LoadedStructValue = + Builder.CreateInsertValue(LoadedStructValue, AI, i); + } - Address AddrToStoreInto = Address::invalid(); - if (SrcSize <= DstSize) { - AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); + Builder.CreateStore(LoadedStructValue, Ptr); } else { - AddrToStoreInto = - CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); - } + uint64_t SrcSize = StructSize.getFixedValue(); + uint64_t DstSize = PtrElementSize.getFixedValue(); + + Address AddrToStoreInto = Address::invalid(); + if (SrcSize <= DstSize) { + AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); + } else { + AddrToStoreInto = + CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); + } - assert(STy->getNumElements() == NumIRArgs); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto AI = Fn->getArg(FirstIRArg + i); - AI->setName(Arg->getName() + ".coerce" + Twine(i)); - Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); - Builder.CreateStore(AI, EltPtr); - } + assert(STy->getNumElements() == NumIRArgs); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + auto AI = Fn->getArg(FirstIRArg + i); + AI->setName(Arg->getName() + ".coerce" + Twine(i)); + Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); + Builder.CreateStore(AI, EltPtr); + } - if (SrcSize > DstSize) { - Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize); + if (SrcSize > DstSize) { + Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize); + } } - } else { // Simple case, just do a coerced store of the argument into the alloca. assert(NumIRArgs == 1); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5349,8 +5349,9 @@ CGCallee Callee = OrigCallee; - if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function) && - (!TargetDecl || !isa(TargetDecl))) { + if (SanOpts.has(SanitizerKind::Function) && + (!TargetDecl || !isa(TargetDecl)) && + !isa(PointeeType)) { if (llvm::Constant *PrefixSig = CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) { SanitizerScope SanScope(this); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -572,10 +572,11 @@ CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const { // Remove any (C++17) exception specifications, to allow calling e.g. a // noexcept function through a non-noexcept pointer. - auto ProtoTy = getContext().getFunctionTypeWithExceptionSpec(Ty, EST_None); + if (!isa(Ty)) + Ty = getContext().getFunctionTypeWithExceptionSpec(Ty, EST_None); std::string Mangled; llvm::raw_string_ostream Out(Mangled); - CGM.getCXXABI().getMangleContext().mangleTypeName(ProtoTy, Out, false); + CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out, false); return llvm::ConstantInt::get(CGM.Int32Ty, static_cast(llvm::xxHash64(Mangled))); } @@ -945,7 +946,7 @@ // If we are checking function types, emit a function type signature as // prologue data. - if (FD && getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) { + if (FD && SanOpts.has(SanitizerKind::Function)) { if (llvm::Constant *PrologueSig = getPrologueSignature(CGM, FD)) { llvm::LLVMContext &Ctx = Fn->getContext(); llvm::MDBuilder MDB(Ctx); diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -630,13 +630,22 @@ #include "clang/Basic/PPCTypes.def" #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/RISCVVTypes.def" - { - ASTContext::BuiltinVectorTypeInfo Info = - Context.getBuiltinVectorTypeInfo(cast(Ty)); - return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), - Info.EC.getKnownMinValue() * - Info.NumVectors); - } + { + ASTContext::BuiltinVectorTypeInfo Info = + Context.getBuiltinVectorTypeInfo(cast(Ty)); + // Tuple types are expressed as aggregregate types of the same scalable + // vector type (e.g. vint32m1x2_t is two vint32m1_t, which is {, }). + if (Info.NumVectors != 1) { + llvm::Type *EltTy = llvm::ScalableVectorType::get( + ConvertType(Info.ElementType), Info.EC.getKnownMinValue()); + llvm::SmallVector EltTys(Info.NumVectors, EltTy); + return llvm::StructType::get(getLLVMContext(), EltTys); + } + return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), + Info.EC.getKnownMinValue() * + Info.NumVectors); + } #define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS) \ case BuiltinType::Id: { \ if (BuiltinType::Id == BuiltinType::WasmExternRef) \ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -4522,9 +4522,12 @@ ASTContext::BuiltinVectorTypeInfo VecInfo = Context.getBuiltinVectorTypeInfo(cast( TheCall->getArg(0)->getType().getCanonicalType().getTypePtr())); - unsigned MaxIndex = - (VecInfo.EC.getKnownMinValue() * VecInfo.NumVectors) / - (ResVecInfo.EC.getKnownMinValue() * ResVecInfo.NumVectors); + unsigned MaxIndex; + if (VecInfo.NumVectors != 1) // vget for tuple type + MaxIndex = VecInfo.NumVectors; + else // vget for non-tuple type + MaxIndex = (VecInfo.EC.getKnownMinValue() * VecInfo.NumVectors) / + (ResVecInfo.EC.getKnownMinValue() * ResVecInfo.NumVectors); return SemaBuiltinConstantArgRange(TheCall, 1, 0, MaxIndex - 1); } case RISCVVector::BI__builtin_rvv_vset_v: { @@ -4534,9 +4537,12 @@ ASTContext::BuiltinVectorTypeInfo VecInfo = Context.getBuiltinVectorTypeInfo(cast( TheCall->getArg(2)->getType().getCanonicalType().getTypePtr())); - unsigned MaxIndex = - (ResVecInfo.EC.getKnownMinValue() * ResVecInfo.NumVectors) / - (VecInfo.EC.getKnownMinValue() * VecInfo.NumVectors); + unsigned MaxIndex; + if (ResVecInfo.NumVectors != 1) // vset for tuple type + MaxIndex = ResVecInfo.NumVectors; + else // vset fo non-tuple type + MaxIndex = (ResVecInfo.EC.getKnownMinValue() * ResVecInfo.NumVectors) / + (VecInfo.EC.getKnownMinValue() * VecInfo.NumVectors); return SemaBuiltinConstantArgRange(TheCall, 1, 0, MaxIndex - 1); } case RISCVVector::BI__builtin_rvv_sf_vc_i_se_u8mf8: diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -811,7 +811,7 @@ // order to leave them uninitialized, the ILE is expanded and the extra // fields are then filled with NoInitExpr. unsigned NumElems = numStructUnionElements(ILE->getType()); - if (RDecl->hasFlexibleArrayMember()) + if (!RDecl->isUnion() && RDecl->hasFlexibleArrayMember()) ++NumElems; if (!VerifyOnly && ILE->getNumInits() < NumElems) ILE->resizeInits(SemaRef.Context, NumElems); diff --git a/clang/lib/Sema/SemaRISCVVectorLookup.cpp b/clang/lib/Sema/SemaRISCVVectorLookup.cpp --- a/clang/lib/Sema/SemaRISCVVectorLookup.cpp +++ b/clang/lib/Sema/SemaRISCVVectorLookup.cpp @@ -135,8 +135,12 @@ case Invalid: llvm_unreachable("Unhandled type."); } - if (Type->isVector()) - QT = Context.getScalableVectorType(QT, *Type->getScale()); + if (Type->isVector()) { + if (Type->isTuple()) + QT = Context.getScalableVectorType(QT, *Type->getScale(), Type->getNF()); + else + QT = Context.getScalableVectorType(QT, *Type->getScale()); + } if (Type->isConstant()) QT = Context.getConstType(QT); @@ -214,15 +218,16 @@ const Policy DefaultPolicy; llvm::SmallVector ProtoSeq = - RVVIntrinsic::computeBuiltinTypes(BasicProtoSeq, /*IsMasked=*/false, - /*HasMaskedOffOperand=*/false, - Record.HasVL, Record.NF, - UnMaskedPolicyScheme, DefaultPolicy); + RVVIntrinsic::computeBuiltinTypes( + BasicProtoSeq, /*IsMasked=*/false, + /*HasMaskedOffOperand=*/false, Record.HasVL, Record.NF, + UnMaskedPolicyScheme, DefaultPolicy, Record.IsTuple); llvm::SmallVector ProtoMaskSeq = RVVIntrinsic::computeBuiltinTypes( BasicProtoSeq, /*IsMasked=*/true, Record.HasMaskedOffOperand, - Record.HasVL, Record.NF, MaskedPolicyScheme, DefaultPolicy); + Record.HasVL, Record.NF, MaskedPolicyScheme, DefaultPolicy, + Record.IsTuple); bool UnMaskedHasPolicy = UnMaskedPolicyScheme != PolicyScheme::SchemeNone; bool MaskedHasPolicy = MaskedPolicyScheme != PolicyScheme::SchemeNone; @@ -280,7 +285,7 @@ RVVIntrinsic::computeBuiltinTypes( BasicProtoSeq, /*IsMasked=*/false, /*HasMaskedOffOperand=*/false, Record.HasVL, Record.NF, - UnMaskedPolicyScheme, P); + UnMaskedPolicyScheme, P, Record.IsTuple); std::optional PolicyTypes = TypeCache.computeTypes( BaseType, Log2LMUL, Record.NF, PolicyPrototype); InitRVVIntrinsic(Record, SuffixStr, OverloadedSuffixStr, @@ -301,8 +306,9 @@ for (auto P : SupportedMaskedPolicies) { llvm::SmallVector PolicyPrototype = RVVIntrinsic::computeBuiltinTypes( - BasicProtoSeq, /*IsMasked=*/true, Record.HasMaskedOffOperand, - Record.HasVL, Record.NF, MaskedPolicyScheme, P); + BasicProtoSeq, /*IsMasked=*/true, + Record.HasMaskedOffOperand, Record.HasVL, Record.NF, + MaskedPolicyScheme, P, Record.IsTuple); std::optional PolicyTypes = TypeCache.computeTypes( BaseType, Log2LMUL, Record.NF, PolicyPrototype); InitRVVIntrinsic(Record, SuffixStr, OverloadedSuffixStr, diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -400,8 +400,10 @@ Clobbers.push_back(cast_or_null(Record.readSubStmt())); // Labels - for (unsigned I = 0, N = NumLabels; I != N; ++I) + for (unsigned I = 0, N = NumLabels; I != N; ++I) { + Names.push_back(Record.readIdentifier()); Exprs.push_back(Record.readSubStmt()); + } S->setOutputsAndInputsAndClobbers(Record.getContext(), Names.data(), Constraints.data(), diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -317,7 +317,10 @@ Record.AddStmt(S->getClobberStringLiteral(I)); // Labels - for (auto *E : S->labels()) Record.AddStmt(E); + for (unsigned I = 0, N = S->getNumLabels(); I != N; ++I) { + Record.AddIdentifierRef(S->getLabelIdentifier(I)); + Record.AddStmt(S->getLabelExpr(I)); + } Code = serialization::STMT_GCCASM; } diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -113,6 +113,8 @@ return false; if (isFloat() && ElementBitwidth == 8) return false; + if (IsTuple && (NF == 1 || NF > 8)) + return false; unsigned V = *Scale; switch (ElementBitwidth) { case 1: @@ -214,6 +216,9 @@ // vector values. if (IsPointer) BuiltinStr += "*"; + + if (IsTuple) + BuiltinStr = "T" + utostr(NF) + BuiltinStr; } void RVVType::initClangBuiltinStr() { @@ -237,7 +242,8 @@ default: llvm_unreachable("ScalarTypeKind is invalid"); } - ClangBuiltinStr += utostr(ElementBitwidth) + LMUL.str() + "_t"; + ClangBuiltinStr += utostr(ElementBitwidth) + LMUL.str() + + (IsTuple ? "x" + utostr(NF) : "") + "_t"; } void RVVType::initTypeStr() { @@ -249,7 +255,8 @@ auto getTypeString = [&](StringRef TypeStr) { if (isScalar()) return Twine(TypeStr + Twine(ElementBitwidth) + "_t").str(); - return Twine("v" + TypeStr + Twine(ElementBitwidth) + LMUL.str() + "_t") + return Twine("v" + TypeStr + Twine(ElementBitwidth) + LMUL.str() + + (IsTuple ? "x" + utostr(NF) : "") + "_t") .str(); }; @@ -325,6 +332,8 @@ } if (isVector()) ShortStr += LMUL.str(); + if (isTuple()) + ShortStr += "x" + utostr(NF); } void RVVType::applyBasicType() { @@ -542,6 +551,19 @@ return std::nullopt; } + } else if (ComplexTT.first == "Tuple") { + unsigned NF = 0; + if (ComplexTT.second.getAsInteger(10, NF)) { + llvm_unreachable("Invalid NF value!"); + return std::nullopt; + } + switch (NF) { + case 2: + VTM = VectorTypeModifier::Tuple2; + break; + default: + llvm_unreachable("Unhandled NF"); + } } else { llvm_unreachable("Illegal complex type transformers!"); } @@ -702,6 +724,11 @@ case VectorTypeModifier::SFixedLog2LMUL3: applyFixedLog2LMUL(3, FixedLMULType::SmallerThan); break; + case VectorTypeModifier::Tuple2: { + IsTuple = true; + NF = 2; + break; + } case VectorTypeModifier::NoModifier: break; } @@ -912,7 +939,7 @@ llvm::SmallVector RVVIntrinsic::computeBuiltinTypes( llvm::ArrayRef Prototype, bool IsMasked, bool HasMaskedOffOperand, bool HasVL, unsigned NF, - PolicyScheme DefaultScheme, Policy PolicyAttrs) { + PolicyScheme DefaultScheme, Policy PolicyAttrs, bool IsTuple) { SmallVector NewPrototype(Prototype.begin(), Prototype.end()); bool HasPassthruOp = DefaultScheme == PolicyScheme::HasPassthruOperand; @@ -938,8 +965,12 @@ // to // (void, op0 address, op1 address, ..., mask, maskedoff0, maskedoff1, // ...) - NewPrototype.insert(NewPrototype.begin() + NF + 1, - PrototypeDescriptor::Mask); + if (IsTuple) + NewPrototype.insert(NewPrototype.begin() + 1, + PrototypeDescriptor::Mask); + else + NewPrototype.insert(NewPrototype.begin() + NF + 1, + PrototypeDescriptor::Mask); } else { // If IsMasked, insert PrototypeDescriptor:Mask as first input operand. NewPrototype.insert(NewPrototype.begin() + 1, PrototypeDescriptor::Mask); @@ -963,6 +994,8 @@ // If HasVL, append PrototypeDescriptor:VL to last operand if (HasVL) NewPrototype.push_back(PrototypeDescriptor::VL); + if (IsTuple) + NewPrototype[0].VTM = static_cast(VectorTypeModifier::Tuple2); return NewPrototype; } @@ -1077,6 +1110,7 @@ OS << (int)Record.HasMaskedOffOperand << ","; OS << (int)Record.HasTailPolicy << ","; OS << (int)Record.HasMaskPolicy << ","; + OS << (int)Record.IsTuple << ","; OS << (int)Record.UnMaskedPolicyScheme << ","; OS << (int)Record.MaskedPolicyScheme << ","; OS << "},\n"; diff --git a/clang/test/AST/ast-dump-decl.cpp b/clang/test/AST/ast-dump-decl.cpp --- a/clang/test/AST/ast-dump-decl.cpp +++ b/clang/test/AST/ast-dump-decl.cpp @@ -818,3 +818,38 @@ // CHECK: `-TextComment // CHECK: VarDecl {{.*}} Test 'int' extern // CHECK-NOT: FullComment + +namespace TestConstexprVariableTemplateWithInitializer { + template constexpr T foo{}; + // CHECK: VarTemplateDecl 0x{{.+}} <{{.+}}:[[@LINE-1]]:3, col:40> col:36 foo + // CHECK-NEXT: |-TemplateTypeParmDecl 0x{{.+}} col:21 referenced typename depth 0 index 0 T + // CHECK-NEXT: `-VarDecl 0x{{.+}} col:36 foo 'const T' constexpr listinit + // CHECK-NEXT: `-InitListExpr 0x{{.+}} 'void' + + template constexpr int val{42}; + // CHECK: VarTemplateDecl 0x{{.+}} <{{.+}}:[[@LINE-1]]:3, col:44> col:38 val + // CHECK-NEXT: |-TemplateTypeParmDecl 0x{{.+}} col:21 typename depth 0 index 0 T + // CHECK-NEXT: `-VarDecl 0x{{.+}} col:38 val 'const int' constexpr listinit + // CHECK-NEXT: |-value: Int 42 + // CHECK-NEXT: `-InitListExpr 0x{{.+}} 'int' + + template + struct in_place_type_t { + explicit in_place_type_t() = default; + }; + + template + inline constexpr in_place_type_t<_Tp> in_place_type{}; + // CHECK: -VarTemplateDecl 0x{{.+}} col:41 in_place_type + // CHECK-NEXT: |-TemplateTypeParmDecl 0x{{.+}} col:22 referenced typename depth 0 index 0 _Tp + // CHECK-NEXT: `-VarDecl 0x{{.+}} col:41 in_place_type 'const in_place_type_t<_Tp>':'const in_place_type_t<_Tp>' inline constexpr listinit + // CHECK-NEXT: `-InitListExpr 0x{{.+}} 'void' + + template constexpr T call_init(0); + // CHECK: -VarTemplateDecl 0x{{.+}} col:37 call_init + // CHECK-NEXT: |-TemplateTypeParmDecl 0x{{.+}} col:22 referenced typename depth 0 index 0 T + // CHECK-NEXT: `-VarDecl 0x{{.+}} col:37 call_init 'const T' constexpr callinit + // CHECK-NEXT: `-ParenListExpr 0x{{.+}} 'NULL TYPE' + // CHECK-NEXT: `-IntegerLiteral 0x{{.+}} 'int' 0 + +} diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbb.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbb.c --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbb.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv64-zbb.c @@ -50,6 +50,18 @@ return __builtin_riscv_clz_64(a); } +// RV64ZBB-LABEL: @ctz_32( +// RV64ZBB-NEXT: entry: +// RV64ZBB-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// RV64ZBB-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 +// RV64ZBB-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// RV64ZBB-NEXT: [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false) +// RV64ZBB-NEXT: ret i32 [[TMP1]] +// +int ctz_32(int a) { + return __builtin_riscv_ctz_32(a); +} + // RV64ZBB-LABEL: @ctz_64( // RV64ZBB-NEXT: entry: // RV64ZBB-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget_tuple.c @@ -0,0 +1,20 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local @test_vget_v_i32m1x2_i32m1 +// CHECK-RV64-SAME: ( [[SRC_COERCE0:%.*]], [[SRC_COERCE1:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[SRC_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[SRC_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-RV64-NEXT: ret [[TMP2]] +// +vint32m1_t test_vget_v_i32m1x2_i32m1(vint32m1x2_t src) { + return __riscv_vget_v_i32m1x2_i32m1(src, 0); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vloxseg2ei32_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vloxseg2ei32_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vloxseg2ei32_tuple.c @@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local { , } @test_vloxseg2ei32_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], [[BINDEX:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , } @llvm.riscv.vloxseg2.nxv2i32.nxv2i32.i64( poison, poison, ptr [[BASE]], [[BINDEX]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret { , } [[TMP0]] +// +vint32m1x2_t test_vloxseg2ei32_v_tuple_i32m1(const int32_t *base, vuint32m1_t bindex, size_t vl) { + return __riscv_vloxseg2ei32_v_tuple_i32m1(base, bindex, vl); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vloxseg2ei32_v_tuple_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], [[BINDEX:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , } @llvm.riscv.vloxseg2.mask.nxv2i32.nxv2i32.i64( poison, poison, ptr [[BASE]], [[BINDEX]], [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret { , } [[TMP0]] +// +vint32m1x2_t test_vloxseg2ei32_v_tuple_i32m1_m(vbool32_t mask, const int32_t *base, vuint32m1_t bindex, size_t vl) { + return __riscv_vloxseg2ei32_v_tuple_i32m1_m(mask, base, bindex, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlseg2e32_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlseg2e32_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlseg2e32_tuple.c @@ -0,0 +1,27 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s +#include + +// CHECK-RV64-LABEL: define dso_local { , } @test_vlseg2e32_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , } @llvm.riscv.vlseg2.nxv2i32.i64( poison, poison, ptr [[BASE]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret { , } [[TMP0]] +// +vint32m1x2_t test_vlseg2e32_v_tuple_i32m1(const int32_t *base, size_t vl) { + return __riscv_vlseg2e32_v_tuple_i32m1(base, vl); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vlseg2e32_v_tuple_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , } @llvm.riscv.vlseg2.mask.nxv2i32.i64( poison, poison, ptr [[BASE]], [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret { , } [[TMP0]] +// +vint32m1x2_t test_vlseg2e32_v_tuple_i32m1_m(vbool32_t mask, const int32_t *base, size_t vl) { + return __riscv_vlseg2e32_v_tuple_i32m1_m(mask, base, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlseg2e32ff_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlseg2e32ff_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlseg2e32ff_tuple.c @@ -0,0 +1,39 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s +#include + +// CHECK-RV64-LABEL: define dso_local { , } @test_vlseg2e32ff_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , , i64 } @llvm.riscv.vlseg2ff.nxv2i32.i64( poison, poison, ptr [[BASE]], i64 [[VL]]) +// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { , , i64 } [[TMP0]], 0 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , , i64 } [[TMP0]], 1 +// CHECK-RV64-NEXT: [[TMP4:%.*]] = insertvalue { , } [[TMP2]], [[TMP3]], 1 +// CHECK-RV64-NEXT: [[TMP5:%.*]] = extractvalue { , , i64 } [[TMP0]], 2 +// CHECK-RV64-NEXT: store i64 [[TMP5]], ptr [[NEW_VL]], align 8 +// CHECK-RV64-NEXT: ret { , } [[TMP4]] +// +vint32m1x2_t test_vlseg2e32ff_v_tuple_i32m1(const int32_t *base, size_t *new_vl, size_t vl) { + return __riscv_vlseg2e32ff_v_tuple_i32m1(base, new_vl, vl); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vlseg2e32ff_v_tuple_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], ptr noundef [[NEW_VL:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , , i64 } @llvm.riscv.vlseg2ff.mask.nxv2i32.i64( poison, poison, ptr [[BASE]], [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: [[TMP1:%.*]] = extractvalue { , , i64 } [[TMP0]], 0 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , , i64 } [[TMP0]], 1 +// CHECK-RV64-NEXT: [[TMP4:%.*]] = insertvalue { , } [[TMP2]], [[TMP3]], 1 +// CHECK-RV64-NEXT: [[TMP5:%.*]] = extractvalue { , , i64 } [[TMP0]], 2 +// CHECK-RV64-NEXT: store i64 [[TMP5]], ptr [[NEW_VL]], align 8 +// CHECK-RV64-NEXT: ret { , } [[TMP4]] +// +vint32m1x2_t test_vlseg2e32ff_v_tuple_i32m1_m(vbool32_t mask, const int32_t *base, size_t *new_vl, size_t vl) { + return __riscv_vlseg2e32ff_v_tuple_i32m1_m(mask, base, new_vl, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlsseg2e32_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlsseg2e32_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlsseg2e32_tuple.c @@ -0,0 +1,27 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s +#include + +// CHECK-RV64-LABEL: define dso_local { , } @test_vlsseg2e32_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[BSTRIDE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , } @llvm.riscv.vlsseg2.nxv2i32.i64( poison, poison, ptr [[BASE]], i64 [[BSTRIDE]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret { , } [[TMP0]] +// +vint32m1x2_t test_vlsseg2e32_v_tuple_i32m1(const int32_t *base, ptrdiff_t bstride, size_t vl) { + return __riscv_vlsseg2e32_v_tuple_i32m1(base, bstride, vl); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vlsseg2e32_v_tuple_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], i64 noundef [[BSTRIDE:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , } @llvm.riscv.vlsseg2.mask.nxv2i32.i64( poison, poison, ptr [[BASE]], i64 [[BSTRIDE]], [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret { , } [[TMP0]] +// +vint32m1x2_t test_vlsseg2e32_v_tuple_i32m1_m(vbool32_t mask, const int32_t *base, ptrdiff_t bstride, size_t vl) { + return __riscv_vlsseg2e32_v_tuple_i32m1_m(mask, base, bstride, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vluxseg2ei32_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vluxseg2ei32_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vluxseg2ei32_tuple.c @@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local { , } @test_vluxseg2ei32_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], [[BINDEX:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , } @llvm.riscv.vluxseg2.nxv2i32.nxv2i32.i64( poison, poison, ptr [[BASE]], [[BINDEX]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret { , } [[TMP0]] +// +vint32m1x2_t test_vluxseg2ei32_v_tuple_i32m1(const int32_t *base, vuint32m1_t bindex, size_t vl) { + return __riscv_vluxseg2ei32_v_tuple_i32m1(base, bindex, vl); +} + +// CHECK-RV64-LABEL: define dso_local { , } @test_vluxseg2ei32_v_tuple_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], [[BINDEX:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call { , } @llvm.riscv.vluxseg2.mask.nxv2i32.nxv2i32.i64( poison, poison, ptr [[BASE]], [[BINDEX]], [[MASK]], i64 [[VL]], i64 3) +// CHECK-RV64-NEXT: ret { , } [[TMP0]] +// +vint32m1x2_t test_vluxseg2ei32_v_tuple_i32m1_m(vbool32_t mask, const int32_t *base, vuint32m1_t bindex, size_t vl) { + return __riscv_vluxseg2ei32_v_tuple_i32m1_m(mask, base, bindex, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset_tuple.c @@ -0,0 +1,20 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local { , } @test_vset_v_i32m1x2_i32m1 +// CHECK-RV64-SAME: ( [[DEST_COERCE0:%.*]], [[DEST_COERCE1:%.*]], [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DEST_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DEST_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[VAL]], 0 +// CHECK-RV64-NEXT: ret { , } [[TMP2]] +// +vint32m1x2_t test_vset_v_i32m1x2_i32m1(vint32m1x2_t dest, vint32m1_t val) { + return __riscv_vset_v_i32m1x2_i32m1(dest, 0, val); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsoxseg2ei32_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsoxseg2ei32_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsoxseg2ei32_tuple.c @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], [[BINDEX:%.*]], [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.nxv2i32.nxv2i32.i64( [[TMP2]], [[TMP3]], ptr [[BASE]], [[BINDEX]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_tuple_i32m1(int32_t *base, vuint32m1_t bindex, vint32m1x2_t v_tuple, size_t vl) { + return __riscv_vsoxseg2ei32_v_tuple_i32m1(base, bindex, v_tuple, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsoxseg2ei32_v_tuple_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], [[BINDEX:%.*]], [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-RV64-NEXT: call void @llvm.riscv.vsoxseg2.mask.nxv2i32.nxv2i32.i64( [[TMP2]], [[TMP3]], ptr [[BASE]], [[BINDEX]], [[MASK]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsoxseg2ei32_v_tuple_i32m1_m(vbool32_t mask, int32_t *base, vuint32m1_t bindex, vint32m1x2_t v_tuple, size_t vl) { + return __riscv_vsoxseg2ei32_v_tuple_i32m1_m(mask, base, bindex, v_tuple, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsseg2e32_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsseg2e32_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsseg2e32_tuple.c @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsseg2e32_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-RV64-NEXT: call void @llvm.riscv.vsseg2.nxv2i32.i64( [[TMP2]], [[TMP3]], ptr [[BASE]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsseg2e32_v_tuple_i32m1(int32_t *base, vint32m1x2_t v_tuple, size_t vl) { + return __riscv_vsseg2e32_v_tuple_i32m1(base, v_tuple, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsseg2e32_v_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], [[V0:%.*]], [[V1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: call void @llvm.riscv.vsseg2.mask.nxv2i32.i64( [[V0]], [[V1]], ptr [[BASE]], [[MASK]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsseg2e32_v_i32m1_m(vbool32_t mask, int32_t *base, vint32m1_t v0, vint32m1_t v1, size_t vl) { + return __riscv_vsseg2e32_v_i32m1_m(mask, base, v0, v1, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vssseg2e32_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vssseg2e32_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vssseg2e32_tuple.c @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vssseg2e32_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], i64 noundef [[BSTRIDE:%.*]], [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-RV64-NEXT: call void @llvm.riscv.vssseg2.nxv2i32.i64( [[TMP2]], [[TMP3]], ptr [[BASE]], i64 [[BSTRIDE]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vssseg2e32_v_tuple_i32m1(int32_t *base, ptrdiff_t bstride, vint32m1x2_t v_tuple, size_t vl) { + return __riscv_vssseg2e32_v_tuple_i32m1(base, bstride, v_tuple, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vssseg2e32_v_tuple_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], i64 noundef [[BSTRIDE:%.*]], [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-RV64-NEXT: call void @llvm.riscv.vssseg2.mask.nxv2i32.i64( [[TMP2]], [[TMP3]], ptr [[BASE]], i64 [[BSTRIDE]], [[MASK]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vssseg2e32_v_tuple_i32m1_m(vbool32_t mask, int32_t *base, ptrdiff_t bstride, vint32m1x2_t v_tuple, size_t vl) { + return __riscv_vssseg2e32_v_tuple_i32m1_m(mask, base, bstride, v_tuple, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsuxseg2ei32_tuple.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsuxseg2ei32_tuple.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vsuxseg2ei32_tuple.c @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ +// RUN: -target-feature +experimental-zvfh -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_tuple_i32m1 +// CHECK-RV64-SAME: (ptr noundef [[BASE:%.*]], [[BINDEX:%.*]], [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.nxv2i32.nxv2i32.i64( [[TMP2]], [[TMP3]], ptr [[BASE]], [[BINDEX]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_tuple_i32m1(int32_t *base, vuint32m1_t bindex, vint32m1x2_t v_tuple, size_t vl) { + return __riscv_vsuxseg2ei32_v_tuple_i32m1(base, bindex, v_tuple, vl); +} + +// CHECK-RV64-LABEL: define dso_local void @test_vsuxseg2ei32_v_tuple_i32m1_m +// CHECK-RV64-SAME: ( [[MASK:%.*]], ptr noundef [[BASE:%.*]], [[BINDEX:%.*]], [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] { +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// CHECK-RV64-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// CHECK-RV64-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-RV64-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-RV64-NEXT: call void @llvm.riscv.vsuxseg2.mask.nxv2i32.nxv2i32.i64( [[TMP2]], [[TMP3]], ptr [[BASE]], [[BINDEX]], [[MASK]], i64 [[VL]]) +// CHECK-RV64-NEXT: ret void +// +void test_vsuxseg2ei32_v_tuple_i32m1_m(vbool32_t mask, int32_t *base, vuint32m1_t bindex, vint32m1x2_t v_tuple, size_t vl) { + return __riscv_vsuxseg2ei32_v_tuple_i32m1_m(mask, base, bindex, v_tuple, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c @@ -0,0 +1,91 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -O0 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=O0 +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefix=AFTER_MEM2REG + + +#include + +// Declare local variable +// O0-LABEL: define dso_local void @foo +// O0-SAME: () #[[ATTR0:[0-9]+]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: ret void +// +// AFTER_MEM2REG-LABEL: define dso_local void @foo +// AFTER_MEM2REG-SAME: () #[[ATTR0:[0-9]+]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: ret void +// +void foo() { + __rvv_int32m1x2_t v_tuple; +} + +// Declare local variable and return +// O0-LABEL: define dso_local { , } @bar +// O0-SAME: () #[[ATTR0]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[TMP0:%.*]] = load { , }, ptr [[V_TUPLE]], align 4 +// O0-NEXT: ret { , } [[TMP0]] +// +// AFTER_MEM2REG-LABEL: define dso_local { , } @bar +// AFTER_MEM2REG-SAME: () #[[ATTR0]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: ret { , } undef +// +__rvv_int32m1x2_t bar() { + __rvv_int32m1x2_t v_tuple; + return v_tuple; +} + +// Pass as function parameter +// O0-LABEL: define dso_local void @baz +// O0-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// O0-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// O0-NEXT: store { , } [[TMP1]], ptr [[V_TUPLE]], align 4 +// O0-NEXT: [[V_TUPLE1:%.*]] = load { , }, ptr [[V_TUPLE]], align 4 +// O0-NEXT: store { , } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: ret void +// +// AFTER_MEM2REG-LABEL: define dso_local void @baz +// AFTER_MEM2REG-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// AFTER_MEM2REG-NEXT: ret void +// +void baz(__rvv_int32m1x2_t v_tuple) { +} + +// Pass as function parameter and return +// O0-LABEL: define dso_local { , } @qux +// O0-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// O0-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// O0-NEXT: store { , } [[TMP1]], ptr [[V_TUPLE]], align 4 +// O0-NEXT: [[V_TUPLE1:%.*]] = load { , }, ptr [[V_TUPLE]], align 4 +// O0-NEXT: store { , } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: [[TMP2:%.*]] = load { , }, ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: ret { , } [[TMP2]] +// +// AFTER_MEM2REG-LABEL: define dso_local { , } @qux +// AFTER_MEM2REG-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// AFTER_MEM2REG-NEXT: ret { , } [[TMP1]] +// +__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) { + return v_tuple; +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vget-index-out-of-range.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vget-index-out-of-range.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vget-index-out-of-range.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vget-index-out-of-range.c @@ -339,3 +339,8 @@ // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} return __riscv_vget_v_f16m8_f16m4(src, 2); } + +vint32m1_t test_vget_v_i32m1x2_i32m1(vint32m1x2_t src) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + return __riscv_vget_v_i32m1x2_i32m1(src, 2); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vset-index-out-of-range.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vset-index-out-of-range.c --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vset-index-out-of-range.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/vset-index-out-of-range.c @@ -339,3 +339,8 @@ // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} return __riscv_vset_v_f16m4_f16m8(dest, 2, val); } + +vint32m1x2_t test_vset_v_i32m1x2_i32m1(vint32m1x2_t dest, vint32m1_t val) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + return __riscv_vset_v_i32m1x2_i32m1(dest, 2, val); +} diff --git a/clang/test/CodeGen/assignment-tracking/flag.cpp b/clang/test/CodeGen/assignment-tracking/flag.cpp --- a/clang/test/CodeGen/assignment-tracking/flag.cpp +++ b/clang/test/CodeGen/assignment-tracking/flag.cpp @@ -8,10 +8,10 @@ // RUN: -emit-llvm %s -o - -fexperimental-assignment-tracking=disabled -O1\ // RUN: | FileCheck %s --check-prefixes=DISABLE -//// Enabled by default: +//// Disabled by default: // RUN: %clang_cc1 -triple x86_64-none-linux-gnu -debug-info-kind=standalone \ // RUN: -emit-llvm %s -o - -O1 \ -// RUN: | FileCheck %s --check-prefixes=ENABLE +// RUN: | FileCheck %s --check-prefixes=DISABLE //// Disabled at O0 unless forced. // RUN: %clang_cc1 -triple x86_64-none-linux-gnu -debug-info-kind=standalone \ diff --git a/clang/test/CodeGen/ubsan-function.c b/clang/test/CodeGen/ubsan-function.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/ubsan-function.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -std=c17 -fsanitize=function %s -o - | FileCheck %s + +// CHECK-LABEL: define{{.*}} @call_no_prototype( +// CHECK-NOT: __ubsan_handle_function_type_mismatch +void call_no_prototype(void (*f)()) { f(); } + +// CHECK-LABEL: define{{.*}} @call_prototype( +// CHECK: __ubsan_handle_function_type_mismatch +void call_prototype(void (*f)(void)) { f(); } diff --git a/clang/test/PCH/asm-label.cpp b/clang/test/PCH/asm-label.cpp new file mode 100644 --- /dev/null +++ b/clang/test/PCH/asm-label.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -emit-pch %s -o %t +// RUN: %clang_cc1 -include-pch %t %s -verify +#ifndef HEADER_H +#define HEADER_H +template +void MyMethod() { + void *bar; + some_path: + asm goto + ( + "mov %w[foo], %w[foo]" + : [foo] "=r"(bar) + : [foo2] "r"(bar), [foo3] "r"(bar), [foo4] "r"(bar) + : + : some_path + ); + } +#else +void test() { + MyMethod(); +// expected-no-diagnostics +} +#endif diff --git a/clang/test/Sema/flexible-array-in-union.c b/clang/test/Sema/flexible-array-in-union.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/flexible-array-in-union.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -verify=c -fsyntax-only +// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++ +// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility +// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++ + +// The test checks that an attempt to initialize union with flexible array +// member with an initializer list doesn't crash clang. + + +union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union is not allowed}} + +// expected-no-diagnostics + diff --git a/clang/test/Sema/riscv-types.c b/clang/test/Sema/riscv-types.c --- a/clang/test/Sema/riscv-types.c +++ b/clang/test/Sema/riscv-types.c @@ -133,6 +133,9 @@ // CHECK: __rvv_int8mf2_t x43; __rvv_int8mf2_t x43; + + // CHECK: __rvv_int32m1x2_t x44; + __rvv_int32m1x2_t x44; } typedef __rvv_bool4_t vbool4_t; diff --git a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp @@ -114,12 +114,10 @@ return {nullptr, {}, {}}; // Value of the unary op. - auto *UnaryOpValue = State.Env.getValue(*UO, SkipPast::None); + auto *UnaryOpValue = State.Env.getValueStrict(*UO); if (!UnaryOpValue) { - auto &Loc = State.Env.createStorageLocation(*UO); - State.Env.setStorageLocation(*UO, Loc); UnaryOpValue = &State.Env.makeAtomicBoolValue(); - State.Env.setValue(Loc, *UnaryOpValue); + State.Env.setValueStrict(*UO, *UnaryOpValue); } // Properties for the operand (sub expression). @@ -133,22 +131,17 @@ void transferBinary(const BinaryOperator *BO, const MatchFinder::MatchResult &M, LatticeTransferState &State) { - StorageLocation *Loc = State.Env.getStorageLocation(*BO, SkipPast::None); - if (!Loc) { - Loc = &State.Env.createStorageLocation(*BO); - State.Env.setStorageLocation(*BO, *Loc); - } - BoolValue *Comp = cast_or_null(State.Env.getValue(*Loc)); + BoolValue *Comp = cast_or_null(State.Env.getValueStrict(*BO)); if (!Comp) { Comp = &State.Env.makeAtomicBoolValue(); - State.Env.setValue(*Loc, *Comp); + State.Env.setValueStrict(*BO, *Comp); } // FIXME Use this as well: // auto *NegatedComp = &State.Env.makeNot(*Comp); - auto *LHS = State.Env.getValue(*BO->getLHS(), SkipPast::None); - auto *RHS = State.Env.getValue(*BO->getRHS(), SkipPast::None); + auto *LHS = State.Env.getValueStrict(*BO->getLHS()); + auto *RHS = State.Env.getValueStrict(*BO->getRHS()); if (!LHS || !RHS) return; @@ -244,19 +237,43 @@ } } +// Returns the `Value` associated with `E` (which may be either a prvalue or +// glvalue). Creates a `Value` or `StorageLocation` as needed if `E` does not +// have either of these associated with it yet. +// +// If this functionality turns out to be needed in more cases, this function +// should be moved to a more central location. +Value *getOrCreateValue(const Expr *E, Environment &Env) { + Value *Val = nullptr; + if (E->isGLValue()) { + StorageLocation *Loc = Env.getStorageLocationStrict(*E); + if (!Loc) { + Loc = &Env.createStorageLocation(*E); + Env.setStorageLocationStrict(*E, *Loc); + } + Val = Env.getValue(*Loc); + if (!Val) { + Val = Env.createValue(E->getType()); + Env.setValue(*Loc, *Val); + } + } else { + Val = Env.getValueStrict(*E); + if (!Val) { + Val = Env.createValue(E->getType()); + Env.setValueStrict(*E, *Val); + } + } + assert(Val != nullptr); + + return Val; +} + void transferExpr(const Expr *E, const MatchFinder::MatchResult &M, LatticeTransferState &State) { const ASTContext &Context = *M.Context; - StorageLocation *Loc = State.Env.getStorageLocation(*E, SkipPast::None); - if (!Loc) { - Loc = &State.Env.createStorageLocation(*E); - State.Env.setStorageLocation(*E, *Loc); - } - Value *Val = State.Env.getValue(*Loc); - if (!Val) { - Val = State.Env.createValue(Context.IntTy); - State.Env.setValue(*Loc, *Val); - } + + Value *Val = getOrCreateValue(E, State.Env); + // The sign symbolic values have been initialized already. if (Val->getProperty("neg")) return; diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -296,6 +296,22 @@ UnorderedElementsAre("foo")))))); } +TEST_F(NoreturnDestructorTest, + ConditionalOperatorConstantCondition_LeftBranchReturns) { + std::string Code = R"( + #include "noreturn_destructor_test_defs.h" + + void target() { + int value = true ? foo() : Fatal().bar(); + (void)0; + // [[p]] + } + )"; + runDataflow(Code, UnorderedElementsAre(IsStringMapEntry( + "p", HoldsFunctionCallLattice(HasCalledFunctions( + UnorderedElementsAre("foo")))))); +} + TEST_F(NoreturnDestructorTest, ConditionalOperatorRightBranchReturns) { std::string Code = R"( #include "noreturn_destructor_test_defs.h" @@ -311,6 +327,22 @@ UnorderedElementsAre("foo")))))); } +TEST_F(NoreturnDestructorTest, + ConditionalOperatorConstantCondition_RightBranchReturns) { + std::string Code = R"( + #include "noreturn_destructor_test_defs.h" + + void target() { + int value = false ? Fatal().bar() : foo(); + (void)0; + // [[p]] + } + )"; + runDataflow(Code, UnorderedElementsAre(IsStringMapEntry( + "p", HoldsFunctionCallLattice(HasCalledFunctions( + UnorderedElementsAre("foo")))))); +} + TEST_F(NoreturnDestructorTest, ConditionalOperatorNestedBranchesDoNotReturn) { std::string Code = R"( #include "noreturn_destructor_test_defs.h" diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp --- a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp @@ -2764,9 +2764,6 @@ } TEST_P(UncheckedOptionalAccessTest, OptionalValueInitialization) { - // FIXME: Fix when to initialize `value`. All unwrapping should be safe in - // this example, but `value` initialization is done multiple times during the - // fixpoint iterations and joining the environment won't correctly merge them. ExpectDiagnosticsFor( R"( #include "unchecked_optional_access_test.h" @@ -2786,7 +2783,7 @@ } // Now we merge the two values. UncheckedOptionalAccessModel::merge() will // throw away the "value" property. - foo->value(); // [[unsafe]] + foo->value(); } )"); } diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -65,6 +65,7 @@ bool HasMaskedOffOperand :1; bool HasTailPolicy : 1; bool HasMaskPolicy : 1; + bool IsTuple : 1; uint8_t UnMaskedPolicyScheme : 2; uint8_t MaskedPolicyScheme : 2; }; @@ -363,6 +364,16 @@ TypeModifier::UnsignedInteger)); printType(*UT); } + // FIXME: Expand more type declaration + if (I == 'i' && Log2LMUL == 0) { // vint32m1x2_t + auto TupleT = TypeCache.computeType( + BT, Log2LMUL, + PrototypeDescriptor(BaseTypeModifier::Vector, + VectorTypeModifier::Tuple2, + TypeModifier::SignedInteger)); + if (TupleT) + printType(*TupleT); + } } } @@ -512,6 +523,7 @@ StringRef IRName = R->getValueAsString("IRName"); StringRef MaskedIRName = R->getValueAsString("MaskedIRName"); unsigned NF = R->getValueAsInt("NF"); + bool IsTuple = R->getValueAsBit("IsTuple"); const Policy DefaultPolicy; SmallVector SupportedUnMaskedPolicies = @@ -532,10 +544,10 @@ auto Prototype = RVVIntrinsic::computeBuiltinTypes( BasicPrototype, /*IsMasked=*/false, /*HasMaskedOffOperand=*/false, HasVL, NF, UnMaskedPolicyScheme, - DefaultPolicy); + DefaultPolicy, IsTuple); auto MaskedPrototype = RVVIntrinsic::computeBuiltinTypes( BasicPrototype, /*IsMasked=*/true, HasMaskedOffOperand, HasVL, NF, - MaskedPolicyScheme, DefaultPolicy); + MaskedPolicyScheme, DefaultPolicy, IsTuple); // Create Intrinsics for each type and LMUL. for (char I : TypeRange) { @@ -564,7 +576,7 @@ RVVIntrinsic::computeBuiltinTypes( BasicPrototype, /*IsMasked=*/false, /*HasMaskedOffOperand=*/false, HasVL, NF, - UnMaskedPolicyScheme, P); + UnMaskedPolicyScheme, P, IsTuple); std::optional PolicyTypes = TypeCache.computeTypes(BT, Log2LMUL, NF, PolicyPrototype); Out.push_back(std::make_unique( @@ -590,7 +602,7 @@ SmallVector PolicyPrototype = RVVIntrinsic::computeBuiltinTypes( BasicPrototype, /*IsMasked=*/true, HasMaskedOffOperand, HasVL, - NF, MaskedPolicyScheme, P); + NF, MaskedPolicyScheme, P, IsTuple); std::optional PolicyTypes = TypeCache.computeTypes(BT, Log2LMUL, NF, PolicyPrototype); Out.push_back(std::make_unique( @@ -650,6 +662,7 @@ SR.Prototype = std::move(BasicPrototype); SR.Suffix = parsePrototypes(SuffixProto); SR.OverloadedSuffix = parsePrototypes(OverloadedSuffixProto); + SR.IsTuple = IsTuple; SemaRecords->push_back(SR); } @@ -691,6 +704,7 @@ R.HasMaskPolicy = SR.HasMaskPolicy; R.UnMaskedPolicyScheme = SR.UnMaskedPolicyScheme; R.MaskedPolicyScheme = SR.MaskedPolicyScheme; + R.IsTuple = SR.IsTuple; assert(R.PrototypeIndex != static_cast(SemaSignatureTable::INVALID_INDEX)); diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/c.c b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/c.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/c.c @@ -0,0 +1,14 @@ +// RUN: %clang -g -fsanitize=function %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --implicit-check-not='runtime error:' + +void f(void (*fp)(int (*)[])) { fp(0); } + +void callee0(int (*a)[]) {} +void callee1(int (*a)[1]) {} + +int main() { + int a[1]; + f(callee0); + // CHECK: runtime error: call to function callee1 through pointer to incorrect function type 'void (*)(int (*)[])' + f(callee1); // compatible type in C, but flagged +} diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp --- a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp +++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp @@ -1,6 +1,3 @@ -// Work around "Cannot represent a difference across sections" -// UNSUPPORTED: target=powerpc64-{{.*}} - // RUN: %clangxx -DDETERMINE_UNIQUE %s -o %t-unique // RUN: %clangxx -std=c++17 -fsanitize=function %s -O3 -g -DSHARED_LIB -fPIC -shared -o %t-so.so // RUN: %clangxx -std=c++17 -fsanitize=function %s -O3 -g -o %t %t-so.so diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/lit.local.cfg.py b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/lit.local.cfg.py --- a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/lit.local.cfg.py +++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/lit.local.cfg.py @@ -1,5 +1,8 @@ if config.host_os not in ['Darwin', 'FreeBSD', 'Linux', 'NetBSD']: config.unsupported = True +# Work around "Cannot represent a difference across sections" +if config.target_arch == 'powerpc64': + config.unsupported = True # Work around "library ... not found: needed by main executable" in qemu. if config.android and config.target_arch not in ['x86', 'x86_64']: config.unsupported = True diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -363,6 +363,7 @@ * Constraint C1406, which prohibits the same module name from being used in a scope for both an intrinsic and a non-intrinsic module, is implemented as a portability warning only, not a hard error. +* IBM @PROCESS directive is accepted but ignored. ## Preprocessing behavior diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -23,6 +23,10 @@ class FirOpBuilder; } +namespace mlir { +class IRMapping; +} + namespace hlfir { class AssociateOp; @@ -359,13 +363,18 @@ mlir::ValueRange typeParams, const ElementalKernelGenerator &genKernel); +/// Structure to describe a loop nest. +struct LoopNest { + fir::DoLoopOp outerLoop; + fir::DoLoopOp innerLoop; + llvm::SmallVector oneBasedIndices; +}; + /// Generate a fir.do_loop nest looping from 1 to extents[i]. -/// Return the inner fir.do_loop and the indices of the loops. -std::pair> -genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents); -inline std::pair> -genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value shape) { +LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::ValueRange extents); +inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value shape) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape)); } @@ -379,6 +388,20 @@ hlfir::ElementalOp elemental, mlir::ValueRange oneBasedIndices); +/// Inline the body of an hlfir.elemental without cloning the resulting +/// hlfir.yield_element, and return the cloned operand of the +/// hlfir.yield_element. The mapper must be provided to cover complex cases +/// where the inlined elemental is not defined in the current context and uses +/// values that have been cloned already. +/// A callback is provided to indicate if an hlfir.apply inside the +/// hlfir.elemental must be immediately replaced by the inlining of the +/// applied hlfir.elemental. +mlir::Value inlineElementalOp( + mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::ElementalOp elemental, mlir::ValueRange oneBasedIndices, + mlir::IRMapping &mapper, + const std::function &mustRecursivelyInline); + std::pair> convertToValue(mlir::Location loc, fir::FirOpBuilder &builder, const hlfir::Entity &entity); diff --git a/flang/include/flang/Optimizer/Dialect/FIRDialect.h b/flang/include/flang/Optimizer/Dialect/FIRDialect.h --- a/flang/include/flang/Optimizer/Dialect/FIRDialect.h +++ b/flang/include/flang/Optimizer/Dialect/FIRDialect.h @@ -47,6 +47,9 @@ void registerAttributes(); // Register the Types of this dialect. void registerTypes(); + // Register external interfaces on operations of + // this dialect. + void registerOpExternalInterfaces(); }; /// The FIR codegen dialect is a dialect containing a small set of transient diff --git a/flang/include/flang/Optimizer/Dialect/FIRType.h b/flang/include/flang/Optimizer/Dialect/FIRType.h --- a/flang/include/flang/Optimizer/Dialect/FIRType.h +++ b/flang/include/flang/Optimizer/Dialect/FIRType.h @@ -359,7 +359,7 @@ mlir::Type fromRealTypeID(mlir::MLIRContext *context, llvm::Type::TypeID typeID, fir::KindTy kind); -int getTypeCode(mlir::Type ty, KindMapping &kindMap); +int getTypeCode(mlir::Type ty, const KindMapping &kindMap); inline bool BaseBoxType::classof(mlir::Type type) { return type.isa(); @@ -413,6 +413,14 @@ return fir::unwrapRefType(t).isa(); } +/// Return a string representation of `ty`. The fir.ref is omitted in the +/// representation. +/// +/// fir.array<10x10xf32> -> prefix_10x10xf32 +/// fir.ref -> i32 +std::string getTypeAsString(mlir::Type ty, const KindMapping &kindMap, + llvm::StringRef prefix = ""); + } // namespace fir #endif // FORTRAN_OPTIMIZER_DIALECT_FIRTYPE_H diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -644,10 +644,14 @@ let extraClassDeclaration = [{ mlir::Block *getBody() { return &getRegion().front(); } - // Get the indices iterating over the shape. + /// Get the indices iterating over the shape. mlir::Block::BlockArgListType getIndices() { return getBody()->getArguments(); } + + /// Must this elemental be evaluated in order? + /// TODO: add attribute and set it in lowering. + bool isOrdered() {return true;} }]; let skipDefaultBuilders = 1; diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -1517,11 +1517,10 @@ // iterations are cleaned up inside the iterations. if (!callContext.resultType) { // Subroutine case. Generate call inside loop nest. - auto [innerLoop, oneBasedIndicesVector] = - hlfir::genLoopNest(loc, builder, shape); - mlir::ValueRange oneBasedIndices = oneBasedIndicesVector; + hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, shape); + mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/ConvertConstant.cpp b/flang/lib/Lower/ConvertConstant.cpp --- a/flang/lib/Lower/ConvertConstant.cpp +++ b/flang/lib/Lower/ConvertConstant.cpp @@ -20,6 +20,8 @@ #include "flang/Optimizer/Builder/Complex.h" #include "flang/Optimizer/Builder/Todo.h" +#include + /// Convert string, \p s, to an APFloat value. Recognize and handle Inf and /// NaN strings as well. \p s is assumed to not contain any spaces. static llvm::APFloat consAPFloat(const llvm::fltSemantics &fsem, @@ -66,17 +68,12 @@ /// Helper class to lower an array constant to a global with an MLIR dense /// attribute. /// -/// If we have a rank-1 array of integer, real, or logical, then we can +/// If we have an array of integer, real, or logical, then we can /// create a global array with the dense attribute. /// /// The mlir tensor type can only handle integer, real, or logical. It /// does not currently support nested structures which is required for /// complex. -/// -/// Also, we currently handle just rank-1 since tensor type assumes -/// row major array ordering. We will need to reorder the dimensions -/// in the tensor type to support Fortran's column major array ordering. -/// How to create this tensor type is to be determined. class DenseGlobalBuilder { public: static fir::GlobalOp tryCreating(fir::FirOpBuilder &builder, @@ -124,8 +121,6 @@ &constant) { static_assert(TC != Fortran::common::TypeCategory::Character, "must be numerical or logical"); - if (constant.Rank() != 1) - return; auto attrTc = TC == Fortran::common::TypeCategory::Logical ? Fortran::common::TypeCategory::Integer : TC; @@ -158,12 +153,16 @@ llvm::StringRef globalName, mlir::StringAttr linkage, bool isConst) const { - // Not a rank 1 "trivial" intrinsic constant array, or empty array. + // Not a "trivial" intrinsic constant array, or empty array. if (!attributeElementType || attributes.empty()) return {}; + assert(symTy.isa() && "expecting an array global"); + auto arrTy = symTy.cast(); + llvm::SmallVector tensorShape(arrTy.getShape()); + std::reverse(tensorShape.begin(), tensorShape.end()); auto tensorTy = - mlir::RankedTensorType::get(attributes.size(), attributeElementType); + mlir::RankedTensorType::get(tensorShape, attributeElementType); auto init = mlir::DenseElementsAttr::get(tensorTy, attributes); return builder.createGlobal(loc, symTy, globalName, linkage, init, isConst); } @@ -544,6 +543,13 @@ true, constant); } if (!global) + // If the number of elements of the array is huge, the compilation may + // use a lot of memory and take a very long time to complete. + // Empirical evidence shows that an array with 150000 elements of + // complex type takes roughly 30 seconds to compile and uses 4GB of RAM, + // on a modern machine. + // It would be nice to add a driver switch to control the array size + // after which flang should not continue to compile. global = builder.createGlobalConstant( loc, arrayTy, globalName, [&](fir::FirOpBuilder &builder) { diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -431,14 +431,9 @@ // If this is an array, check to see if we can use a dense attribute // with a tensor mlir type. This optimization currently only supports - // rank-1 Fortran arrays of integer, real, or logical. The tensor - // type does not support nested structures which are needed for - // complex numbers. - // To get multidimensional arrays to work, we will have to use column major - // array ordering with the tensor type (so it matches column major ordering - // with the Fortran fir.array). By default, tensor types assume row major - // ordering. How to create this tensor type is to be determined. - if (symTy.isa() && sym.Rank() == 1 && + // Fortran arrays of integer, real, or logical. The tensor type does + // not support nested structures which are needed for complex numbers. + if (symTy.isa() && !Fortran::semantics::IsAllocatableOrPointer(sym)) { mlir::Type eleTy = symTy.cast().getEleTy(); if (eleTy.isa()) { diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -478,6 +478,57 @@ } } +static mlir::acc::PrivateRecipeOp +createBasePrivateRecipeOp(fir::FirOpBuilder &builder, mlir::Value input, + llvm::StringRef recipeName, mlir::Location loc) { + mlir::ModuleOp mod = builder.getModule(); + mlir::OpBuilder modBuilder(mod.getBodyRegion()); + mlir::Type ty = input.getType(); + auto recipe = + modBuilder.create(loc, recipeName, ty); + builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(), + {ty}, {loc}); + builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); + builder.create( + loc, recipe.getInitRegion().front().getArgument(0)); + return recipe; +} + +static void +genPrivatizations(const Fortran::parser::AccObjectList &objectList, + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semanticsContext, + Fortran::lower::StatementContext &stmtCtx, + llvm::SmallVectorImpl &dataOperands, + llvm::SmallVector &privatizations) { + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + mlir::ModuleOp mod = builder.getModule(); + for (const auto &accObject : objectList.v) { + llvm::SmallVector bounds; + std::stringstream asFortran; + mlir::Location operandLocation = genOperandLocation(converter, accObject); + mlir::Value baseAddr = gatherDataOperandAddrAndBounds( + converter, builder, semanticsContext, stmtCtx, accObject, + operandLocation, asFortran, bounds); + + std::string recipeName = fir::getTypeAsString( + baseAddr.getType(), converter.getKindMap(), "privatization"); + if (auto recipe = + mod.lookupSymbol(recipeName)) { + privatizations.push_back(mlir::SymbolRefAttr::get( + builder.getContext(), recipe.getSymName().str())); + } else { + auto crtPos = builder.saveInsertionPoint(); + mlir::acc::PrivateRecipeOp newRecipe = createBasePrivateRecipeOp( + builder, baseAddr, recipeName, operandLocation); + builder.restoreInsertionPoint(crtPos); + privatizations.push_back(mlir::SymbolRefAttr::get( + builder.getContext(), newRecipe.getSymName().str())); + } + dataOperands.push_back(baseAddr); + } +} + template static void genObjectListWithModifier( const Clause *x, Fortran::lower::AbstractConverter &converter, @@ -633,7 +684,7 @@ Fortran::semantics::SemanticsContext &semanticsContext, Fortran::lower::StatementContext &stmtCtx, const Fortran::parser::AccClauseList &accClauseList) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); mlir::Value workerNum; mlir::Value vectorNum; @@ -641,6 +692,7 @@ mlir::Value gangStatic; llvm::SmallVector tileOperands, privateOperands, reductionOperands; + llvm::SmallVector privatizations; bool hasGang = false, hasVector = false, hasWorker = false; for (const Fortran::parser::AccClause &clause : accClauseList.v) { @@ -665,8 +717,8 @@ } else { // * was passed as value and will be represented as a special // constant. - gangStatic = firOpBuilder.createIntegerConstant( - clauseLocation, firOpBuilder.getIndexType(), starCst); + gangStatic = builder.createIntegerConstant( + clauseLocation, builder.getIndexType(), starCst); } } } @@ -698,8 +750,8 @@ } else { // * was passed as value and will be represented as a -1 constant // integer. - mlir::Value tileStar = firOpBuilder.createIntegerConstant( - clauseLocation, firOpBuilder.getIntegerType(32), + mlir::Value tileStar = builder.createIntegerConstant( + clauseLocation, builder.getIntegerType(32), /* STAR */ -1); tileOperands.push_back(tileStar); } @@ -707,8 +759,8 @@ } else if (const auto *privateClause = std::get_if( &clause.u)) { - genObjectList(privateClause->v, converter, semanticsContext, stmtCtx, - privateOperands); + genPrivatizations(privateClause->v, converter, semanticsContext, stmtCtx, + privateOperands, privatizations); } else if (std::get_if(&clause.u)) { // Reduction clause is left out for the moment as the clause will probably // end up having its own operation. @@ -728,14 +780,18 @@ addOperands(operands, operandSegments, reductionOperands); auto loopOp = createRegionOp( - firOpBuilder, currentLocation, operands, operandSegments); + builder, currentLocation, operands, operandSegments); if (hasGang) - loopOp.setHasGangAttr(firOpBuilder.getUnitAttr()); + loopOp.setHasGangAttr(builder.getUnitAttr()); if (hasWorker) - loopOp.setHasWorkerAttr(firOpBuilder.getUnitAttr()); + loopOp.setHasWorkerAttr(builder.getUnitAttr()); if (hasVector) - loopOp.setHasVectorAttr(firOpBuilder.getUnitAttr()); + loopOp.setHasVectorAttr(builder.getUnitAttr()); + + if (!privatizations.empty()) + loopOp.setPrivatizationsAttr( + mlir::ArrayAttr::get(builder.getContext(), privatizations)); // Lower clauses mapped to attributes for (const Fortran::parser::AccClause &clause : accClauseList.v) { @@ -745,16 +801,16 @@ const std::optional collapseValue = Fortran::evaluate::ToInt64(*expr); if (collapseValue) { - loopOp.setCollapseAttr(firOpBuilder.getI64IntegerAttr(*collapseValue)); + loopOp.setCollapseAttr(builder.getI64IntegerAttr(*collapseValue)); } } else if (std::get_if(&clause.u)) { - loopOp.setSeqAttr(firOpBuilder.getUnitAttr()); + loopOp.setSeqAttr(builder.getUnitAttr()); } else if (std::get_if( &clause.u)) { - loopOp.setIndependentAttr(firOpBuilder.getUnitAttr()); + loopOp.setIndependentAttr(builder.getUnitAttr()); } else if (std::get_if(&clause.u)) { loopOp->setAttr(mlir::acc::LoopOp::getAutoAttrStrName(), - firOpBuilder.getUnitAttr()); + builder.getUnitAttr()); } } return loopOp; @@ -824,9 +880,9 @@ copyEntryOperands, copyoutEntryOperands, createEntryOperands, dataClauseOperands; - // TODO: need to more work/design. llvm::SmallVector reductionOperands, privateOperands, firstprivateOperands; + llvm::SmallVector privatizations; // Async, wait and self clause have optional values but can be present with // no value as well. When there is no value, the op has an attribute to @@ -973,8 +1029,8 @@ } else if (const auto *privateClause = std::get_if( &clause.u)) { - genObjectList(privateClause->v, converter, semanticsContext, stmtCtx, - privateOperands); + genPrivatizations(privateClause->v, converter, semanticsContext, stmtCtx, + privateOperands, privatizations); } else if (const auto *firstprivateClause = std::get_if( &clause.u)) { @@ -1019,6 +1075,12 @@ if (addSelfAttr) computeOp.setSelfAttrAttr(builder.getUnitAttr()); + if constexpr (!std::is_same_v) { + if (!privatizations.empty()) + computeOp.setPrivatizationsAttr( + mlir::ArrayAttr::get(builder.getContext(), privatizations)); + } + auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointAfter(computeOp); diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -764,26 +764,62 @@ return yield; } -std::pair> -hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents) { +mlir::Value hlfir::inlineElementalOp( + mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::ElementalOp elemental, mlir::ValueRange oneBasedIndices, + mlir::IRMapping &mapper, + const std::function &mustRecursivelyInline) { + mlir::Region ®ion = elemental.getRegion(); + // hlfir.elemental region is a SizedRegion<1>. + assert(region.hasOneBlock() && "elemental region must have one block"); + mapper.map(elemental.getIndices(), oneBasedIndices); + mlir::Block::OpListType &ops = region.back().getOperations(); + assert(!ops.empty() && "elemental block cannot be empty"); + auto end = ops.end(); + for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt) { + if (auto apply = mlir::dyn_cast(*opIt)) + if (auto appliedElemental = + apply.getExpr().getDefiningOp()) + if (mustRecursivelyInline(appliedElemental)) { + llvm::SmallVector clonedApplyIndices; + for (auto indice : apply.getIndices()) + clonedApplyIndices.push_back(mapper.lookupOrDefault(indice)); + mlir::Value inlined = inlineElementalOp( + loc, builder, appliedElemental, clonedApplyIndices, mapper, + mustRecursivelyInline); + mapper.map(apply.getResult(), inlined); + continue; + } + (void)builder.clone(*opIt, mapper); + } + auto oldYield = mlir::dyn_cast_or_null( + region.back().getOperations().back()); + assert(oldYield && "must terminate with yieldElementalOp"); + return mapper.lookupOrDefault(oldYield.getElementValue()); +} + +hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::ValueRange extents) { + hlfir::LoopNest loopNest; assert(!extents.empty() && "must have at least one extent"); auto insPt = builder.saveInsertionPoint(); - llvm::SmallVector indices(extents.size()); + loopNest.oneBasedIndices.assign(extents.size(), mlir::Value{}); // Build loop nest from column to row. auto one = builder.create(loc, 1); mlir::Type indexType = builder.getIndexType(); unsigned dim = extents.size() - 1; - fir::DoLoopOp innerLoop; for (auto extent : llvm::reverse(extents)) { auto ub = builder.createConvert(loc, indexType, extent); - innerLoop = builder.create(loc, one, ub, one); - builder.setInsertionPointToStart(innerLoop.getBody()); + loopNest.innerLoop = builder.create(loc, one, ub, one); + builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); // Reverse the indices so they are in column-major order. - indices[dim--] = innerLoop.getInductionVar(); + loopNest.oneBasedIndices[dim--] = loopNest.innerLoop.getInductionVar(); + if (!loopNest.outerLoop) + loopNest.outerLoop = loopNest.innerLoop; } builder.restoreInsertionPoint(insPt); - return {innerLoop, indices}; + return loopNest; } static fir::ExtendedValue diff --git a/flang/lib/Optimizer/Dialect/FIRDialect.cpp b/flang/lib/Optimizer/Dialect/FIRDialect.cpp --- a/flang/lib/Optimizer/Dialect/FIRDialect.cpp +++ b/flang/lib/Optimizer/Dialect/FIRDialect.cpp @@ -64,6 +64,7 @@ #define GET_OP_LIST #include "flang/Optimizer/Dialect/FIROps.cpp.inc" >(); + registerOpExternalInterfaces(); addInterfaces(); } diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -12,6 +12,7 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRAttr.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" @@ -19,6 +20,7 @@ #include "flang/Optimizer/Support/Utils.h" #include "mlir/Dialect/CommonFolders.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinOps.h" @@ -3758,6 +3760,17 @@ return fortranVar.verifyDeclareLikeOpImpl(getMemref()); } +//===----------------------------------------------------------------------===// +// FIROpsDialect +//===----------------------------------------------------------------------===// + +void fir::FIROpsDialect::registerOpExternalInterfaces() { + // Attach default declare target interfaces to operations which can be marked + // as declare target. + fir::GlobalOp::attachInterface< + mlir::omp::DeclareTargetDefaultModel>(*getContext()); +} + // Tablegen operators #define GET_OP_CLASSES diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -383,7 +383,7 @@ } /// Return the ISO_C_BINDING intrinsic module value of type \p ty. -int getTypeCode(mlir::Type ty, fir::KindMapping &kindMap) { +int getTypeCode(mlir::Type ty, const fir::KindMapping &kindMap) { unsigned width = 0; if (mlir::IntegerType intTy = ty.dyn_cast()) { switch (intTy.getWidth()) { @@ -473,6 +473,50 @@ llvm_unreachable("unsupported type"); } +std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap, + llvm::StringRef prefix) { + std::stringstream name; + name << prefix.str(); + if (!prefix.empty()) + name << "_"; + ty = fir::unwrapRefType(ty); + while (ty) { + if (fir::isa_trivial(ty)) { + if (ty.isIntOrIndex()) { + name << 'i' << ty.getIntOrFloatBitWidth(); + } else if (ty.isa()) { + name << 'f' << ty.getIntOrFloatBitWidth(); + } else if (fir::isa_complex(ty)) { + name << 'z'; + if (auto cplxTy = mlir::dyn_cast_or_null(ty)) { + auto floatTy = cplxTy.getElementType().cast(); + name << floatTy.getWidth(); + } else if (auto cplxTy = mlir::dyn_cast_or_null(ty)) { + name << kindMap.getRealBitsize(cplxTy.getFKind()); + } + } else if (auto logTy = mlir::dyn_cast_or_null(ty)) { + name << 'l' << kindMap.getLogicalBitsize(logTy.getFKind()); + } else { + llvm::report_fatal_error("unsupported type"); + } + break; + } else if (auto charTy = mlir::dyn_cast_or_null(ty)) { + name << 'c' << kindMap.getCharacterBitsize(charTy.getFKind()); + if (charTy.getLen() != fir::CharacterType::singleton()) + name << "x" << charTy.getLen(); + break; + } else if (auto seqTy = mlir::dyn_cast_or_null(ty)) { + for (auto extent : seqTy.getShape()) + name << extent << 'x'; + ty = seqTy.getEleTy(); + } else { + // TODO: add support for RecordType/BaseBoxType + llvm::report_fatal_error("unsupported type"); + } + } + return name.str(); +} + } // namespace fir namespace { diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -552,12 +552,11 @@ adaptor.getTypeparams()); // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. - auto [innerLoop, oneBasedLoopIndices] = - hlfir::genLoopNest(loc, builder, extents); + hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, extents); auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(innerLoop.getBody()); - auto yield = - hlfir::inlineElementalOp(loc, builder, elemental, oneBasedLoopIndices); + builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + auto yield = hlfir::inlineElementalOp(loc, builder, elemental, + loopNest.oneBasedIndices); hlfir::Entity elementValue(yield.getElementValue()); // Skip final AsExpr if any. It would create an element temporary, // which is no needed since the element will be assigned right away in @@ -572,7 +571,7 @@ rewriter.eraseOp(yield); // Assign the element value to the temp element for this iteration. auto tempElement = - hlfir::getElementAt(loc, builder, temp, oneBasedLoopIndices); + hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices); builder.create(loc, elementValue, tempElement); // hlfir.yield_element implicitly marks the end-of-life its operand if // it is an expression created in the hlfir.elemental (since it is its diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -19,11 +19,13 @@ #include "ScheduleOrderedAssignments.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/HLFIR/Passes.h" #include "mlir/IR/IRMapping.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" @@ -42,6 +44,52 @@ llvm::cl::desc("Only run ordered assignment scheduling with no codegen"), llvm::cl::init(false)); +namespace { + +/// Structure that represents a masked expression being lowered. Masked +/// expressions are any expressions inside an hlfir.where. As described in +/// Fortran 2018 section 10.2.3.2, the evaluation of the elemental parts of such +/// expressions must be masked, while the evaluation of none elemental parts +/// must not be masked. This structure analyzes the region evaluating the +/// expression and allows splitting the generation of the none elemental part +/// from the elemental part. +struct MaskedArrayExpr { + MaskedArrayExpr(mlir::Location loc, mlir::Region ®ion); + + /// Generate the none elemental part. Must be called outside of the + /// loops created for the WHERE construct. + void generateNoneElementalPart(fir::FirOpBuilder &builder, + mlir::IRMapping &mapper); + + /// Methods below can only be called once generateNoneElementalPart has been + /// called. + + /// Return the shape of the expression. + mlir::Value generateShape(fir::FirOpBuilder &builder, + mlir::IRMapping &mapper); + /// Return the value of an element value for this expression given the current + /// where loop indices. + mlir::Value generateElementalParts(fir::FirOpBuilder &builder, + mlir::ValueRange oneBasedIndices, + mlir::IRMapping &mapper); + /// Generate the cleanup for the none elemental parts, if any. This must be + /// called after the loops created for the WHERE construct. + void generateNoneElementalCleanupIfAny(fir::FirOpBuilder &builder, + mlir::IRMapping &mapper); + + mlir::Location loc; + mlir::Region ®ion; + /// Was generateNoneElementalPart called? + bool noneElementalPartWasGenerated = false; + /// Set of operations that form the elemental parts of the + /// expression evaluation. These are the hlfir.elemental and + /// hlfir.elemental_addr that form the elemental tree producing + /// the expression value. hlfir.elemental that produce values + /// used inside transformational operations are not part of this set. + llvm::SmallSet elementalParts{}; +}; +} // namespace + namespace { /// Structure that visits an ordered assignment tree and generates code for /// it according to a schedule. @@ -76,6 +124,8 @@ /// Generate code when leaving a given ordered assignment node. void post(hlfir::ForallOp); void post(hlfir::ForallMaskOp); + void post(hlfir::WhereOp); + void post(hlfir::ElseWhereOp); /// Is this an assignment to a vector subscripted entity? static bool hasVectorSubscriptedLhs(hlfir::RegionAssignOp regionAssignOp); @@ -105,9 +155,23 @@ /// at the current insertion point (by cloning). void generateCleanupIfAny(std::optional maybeYield); + /// Generate a masked entity. This can only be called when whereLoopNest was + /// set (When an hlfir.where is being visited). + /// This method returns the scalar element (that may have been previously + /// saved) for the current indices inside the where loop. + mlir::Value generateMaskedEntity(mlir::Location loc, mlir::Region ®ion) { + MaskedArrayExpr maskedExpr(loc, region); + return generateMaskedEntity(maskedExpr); + } + mlir::Value generateMaskedEntity(MaskedArrayExpr &maskedExpr); + + /// Create a fir.if at the current position inside the where loop nest + /// given a mask expression. + void generateMaskIfOp(MaskedArrayExpr &mask); + fir::FirOpBuilder &builder; - /// Map containg the mapping between the original order assignment tree + /// Map containing the mapping between the original order assignment tree /// operations and the operations that have been cloned in the current run. /// It is reset between two runs. mlir::IRMapping mapper; @@ -115,6 +179,9 @@ /// point correctly when leaving a node that requires a fir.do_loop or fir.if /// operation. llvm::SmallVector constructStack; + /// Current where loop nest, if any. + std::optional whereLoopNest; + /// Root of the order assignment tree being lowered. hlfir::OrderedAssignmentTreeOpInterface root; /// Pointer to the current run of the schedule being lowered. @@ -139,8 +206,8 @@ mlir::dyn_cast(op)) walk(subNode); llvm::TypeSwitch(node.getOperation()) - .Case( - [&](auto concreteOp) { post(concreteOp); }) + .Case([&](auto concreteOp) { post(concreteOp); }) .Default([](auto) {}); } } @@ -218,19 +285,78 @@ generateCleanupIfAny(oldLhsYield); } +void OrderedAssignmentRewriter::generateMaskIfOp(MaskedArrayExpr &mask) { + assert(whereLoopNest.has_value() && "must be inside a WHERE"); + mlir::Location loc = mask.loc; + hlfir::Entity maskVal{generateMaskedEntity(mask)}; + maskVal = hlfir::loadTrivialScalar(loc, builder, maskVal); + mlir::Value cdt = builder.createConvert(loc, builder.getI1Type(), maskVal); + // Else region is added when visiting nested hlfir.elseWhereOp, if any. + auto ifOp = builder.create(loc, std::nullopt, cdt, + /*withElseRegion=*/false); + constructStack.push_back(ifOp.getOperation()); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); +} + void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) { mlir::Location loc = whereOp.getLoc(); - TODO(loc, "WHERE in HLFIR"); + MaskedArrayExpr mask(loc, whereOp.getMaskRegion()); + if (!whereLoopNest) { + // Start a loop nest iterating on the shape of the where mask. + mask.generateNoneElementalPart(builder, mapper); + mlir::Value shape = mask.generateShape(builder, mapper); + whereLoopNest = hlfir::genLoopNest(loc, builder, shape); + constructStack.push_back(whereLoopNest->outerLoop.getOperation()); + builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody()); + } + // Generate a fir.if with the value of the current element of the mask + // inside the loops. + generateMaskIfOp(mask); +} + +void OrderedAssignmentRewriter::post(hlfir::WhereOp whereOp) { + assert(!constructStack.empty() && "must contain a fir.if"); + builder.setInsertionPointAfter(constructStack.pop_back_val()); + // If all where/elsewhere fir.if have been popped, this is the outer whereOp, + // and the where loop must be exited. + assert(!constructStack.empty() && "must contain a fir.do_loop or fir.if"); + if (mlir::isa(constructStack.back())) { + builder.setInsertionPointAfter(constructStack.pop_back_val()); + whereLoopNest.reset(); + } } void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) { + assert(!constructStack.empty() && "cannot be empty inside a where"); mlir::Location loc = elseWhereOp.getLoc(); - TODO(loc, "ELSEWHERE in HLFIR"); + // Create an "else" region for the current where/elsewhere fir.if. + auto ifOp = mlir::dyn_cast(constructStack.back()); + assert(ifOp && ifOp.getElseRegion().empty() && "must be an if without else"); + builder.createBlock(&ifOp.getElseRegion()); + auto end = builder.create(loc); + builder.setInsertionPoint(end); + if (elseWhereOp.getMaskRegion().empty()) + return; + // Create new nested fir.if with elsewhere mask if any. + MaskedArrayExpr mask(loc, elseWhereOp.getMaskRegion()); + generateMaskIfOp(mask); +} + +void OrderedAssignmentRewriter::post(hlfir::ElseWhereOp elseWhereOp) { + // Exit ifOp that was created for the elseWhereOp mask, if any. + if (elseWhereOp.getMaskRegion().empty()) + return; + assert(!constructStack.empty() && "must contain a fir.if"); + builder.setInsertionPointAfter(constructStack.pop_back_val()); } std::pair> OrderedAssignmentRewriter::generateYieldedEntity(mlir::Region ®ion) { // TODO: if the region was saved, use that instead of generating code again. + if (whereLoopNest.has_value()) { + mlir::Location loc = region.getParentOp()->getLoc(); + return {generateMaskedEntity(loc, region), std::nullopt}; + } assert(region.hasOneBlock() && "region must contain one block"); // Clone all operations except the final hlfir.yield. mlir::Block::OpListType &ops = region.back().getOperations(); @@ -258,6 +384,27 @@ return value; } +mlir::Value +OrderedAssignmentRewriter::generateMaskedEntity(MaskedArrayExpr &maskedExpr) { + assert(whereLoopNest.has_value() && "must be inside WHERE loop nest"); + auto insertionPoint = builder.saveInsertionPoint(); + if (!maskedExpr.noneElementalPartWasGenerated) { + // Generate none elemental part before the where loops (but inside the + // current forall loops if any). + builder.setInsertionPoint(whereLoopNest->outerLoop); + maskedExpr.generateNoneElementalPart(builder, mapper); + } + // Generate the none elemental part cleanup after the where loops. + builder.setInsertionPointAfter(whereLoopNest->outerLoop); + maskedExpr.generateNoneElementalCleanupIfAny(builder, mapper); + // Generate the value of the current element for the masked expression + // at the current insertion point (inside the where loops, and any fir.if + // generated for previous masks). + builder.restoreInsertionPoint(insertionPoint); + return maskedExpr.generateElementalParts( + builder, whereLoopNest->oneBasedIndices, mapper); +} + void OrderedAssignmentRewriter::generateCleanupIfAny( std::optional maybeYield) { if (maybeYield.has_value()) @@ -310,6 +457,127 @@ return false; } +/// Is the apply using all the elemental indices in order? +static bool isInOrderApply(hlfir::ApplyOp apply, hlfir::ElementalOp elemental) { + if (elemental.getIndices().size() != apply.getIndices().size()) + return false; + for (auto [elementalIdx, applyIdx] : + llvm::zip(elemental.getIndices(), apply.getIndices())) + if (elementalIdx != applyIdx) + return false; + return true; +} + +/// Gather the chain of hlfir::ElementalOp, if any, that produced \p value. +static void +gatherElementalTree(mlir::Value value, + llvm::SmallPtrSetImpl &elementalOps, + bool isOutOfOrder) { + if (auto elemental = value.getDefiningOp()) { + // Only inline an applied elemental that must be executed in order if the + // applying indices are in order. An hlfir::Elemental may have been created + // for a transformational like transpose, and Fortran 2018 standard + // section 10.2.3.2, point 10 imply that impure elemental sub-expression + // evaluations should not be masked if they are the arguments of + // transformational expressions. + if (isOutOfOrder && elemental.isOrdered()) + return; + elementalOps.insert(elemental.getOperation()); + for (mlir::Operation &op : elemental.getBody()->getOperations()) + if (auto apply = mlir::dyn_cast(op)) { + bool isUnorderedApply = + isOutOfOrder || !isInOrderApply(apply, elemental); + gatherElementalTree(apply.getExpr(), elementalOps, isUnorderedApply); + } + } +} + +MaskedArrayExpr::MaskedArrayExpr(mlir::Location loc, mlir::Region ®ion) + : loc{loc}, region{region} { + mlir::Operation &terminator = region.back().back(); + // TODO: clarify if vector subscripts must be inlined or not here. + // In case of x(elemental(A), :), this could lead to more elemental(A) + // evaluation than needed, which is not OK if "elemental" is impure. + // The standard is not very clear here. + if (mlir::isa(terminator)) + TODO(loc, "vector subscripted assignments inside WHERE"); + mlir::Value entity = mlir::cast(terminator).getEntity(); + gatherElementalTree(entity, elementalParts, /*isOutOfOrder=*/false); +} + +void MaskedArrayExpr::generateNoneElementalPart(fir::FirOpBuilder &builder, + mlir::IRMapping &mapper) { + assert(!noneElementalPartWasGenerated && + "none elemental parts already generated"); + // Clone all operations, except the elemental and the final yield. + mlir::Block::OpListType &ops = region.back().getOperations(); + assert(!ops.empty() && "yield block cannot be empty"); + auto end = ops.end(); + for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt) + if (!elementalParts.contains(&*opIt)) + (void)builder.clone(*opIt, mapper); + noneElementalPartWasGenerated = true; +} + +mlir::Value MaskedArrayExpr::generateShape(fir::FirOpBuilder &builder, + mlir::IRMapping &mapper) { + assert(noneElementalPartWasGenerated && + "non elemental part must have been generated"); + mlir::Operation &terminator = region.back().back(); + // If the operation that produced the yielded entity is elemental, it was not + // cloned, but it holds a shape argument that was cloned. Return the cloned + // shape. + if (auto elementalAddrOp = mlir::dyn_cast(terminator)) + return mapper.lookupOrDefault(elementalAddrOp.getShape()); + mlir::Value entity = mlir::cast(terminator).getEntity(); + if (auto elemental = entity.getDefiningOp()) + return mapper.lookupOrDefault(elemental.getShape()); + // Otherwise, the whole entity was cloned, and the shape can be generated + // from it. + hlfir::Entity clonedEntity{mapper.lookupOrDefault(entity)}; + return hlfir::genShape(loc, builder, hlfir::Entity{clonedEntity}); +} + +mlir::Value +MaskedArrayExpr::generateElementalParts(fir::FirOpBuilder &builder, + mlir::ValueRange oneBasedIndices, + mlir::IRMapping &mapper) { + assert(noneElementalPartWasGenerated && + "non elemental part must have been generated"); + mlir::Operation &terminator = region.back().back(); + if (mlir::isa(terminator)) + TODO(loc, "vector subscripted assignments inside WHERE"); + mlir::Value entity = mlir::cast(terminator).getEntity(); + auto elemental = entity.getDefiningOp(); + if (!elemental) { + hlfir::Entity clonedEntity{mapper.lookupOrDefault(entity)}; + return hlfir::getElementAt(loc, builder, clonedEntity, oneBasedIndices); + } + auto mustRecursivelyInline = + [&](hlfir::ElementalOp appliedElemental) -> bool { + return elementalParts.contains(appliedElemental.getOperation()); + }; + return inlineElementalOp(loc, builder, elemental, oneBasedIndices, mapper, + mustRecursivelyInline); +} + +void MaskedArrayExpr::generateNoneElementalCleanupIfAny( + fir::FirOpBuilder &builder, mlir::IRMapping &mapper) { + mlir::Operation &terminator = region.back().back(); + if (mlir::isa(terminator)) + TODO(loc, "vector subscripted assignments inside WHERE"); + auto yieldOp = mlir::cast(terminator); + if (yieldOp.getCleanup().empty()) + return; + for (mlir::Operation &op : yieldOp.getCleanup().getOps()) { + if (auto destroy = mlir::dyn_cast(op)) + if (elementalParts.contains(destroy.getExpr().getDefiningOp())) + continue; + if (!mlir::isa(op)) + (void)builder.clone(op, mapper); + } +} + /// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given /// a schedule. static void lower(hlfir::OrderedAssignmentTreeOpInterface root, @@ -371,8 +639,9 @@ mlir::LogicalResult matchAndRewrite(hlfir::WhereOp whereOp, mlir::PatternRewriter &rewriter) const override { - TODO(whereOp.getLoc(), "WHERE construct or statement in HLFIR"); - return mlir::failure(); + auto root = mlir::cast( + whereOp.getOperation()); + return ::rewrite(root, tryFusingAssignments, rewriter); } const bool tryFusingAssignments; }; diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp --- a/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ScheduleOrderedAssignments.cpp @@ -568,6 +568,12 @@ os << "rhs"; else if (&assign.getLhsRegion() == &yieldRegion) os << "lhs"; + } else if (auto where = mlir::dyn_cast(parent)) { + if (&where.getMaskRegion() == &yieldRegion) + os << "mask"; + } else if (auto elseWhereOp = mlir::dyn_cast(parent)) { + if (&elseWhereOp.getMaskRegion() == &yieldRegion) + os << "mask"; } else { os << "unknown"; } diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -777,8 +777,23 @@ return false; } +static bool IsAtProcess(const char *p) { + static const char pAtProc[]{"process"}; + for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { + if (ToLowerCaseLetter(*++p) != pAtProc[i]) + return false; + } + return true; +} + bool Prescanner::IsFixedFormCommentLine(const char *start) const { const char *p{start}; + + // The @process directive must start in column 1. + if (*p == '@' && IsAtProcess(p)) { + return true; + } + if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. ((*p == 'D' || *p == 'd') && !features_.IsEnabled(LanguageFeature::OldDebugLines))) { @@ -810,6 +825,8 @@ p = SkipWhiteSpaceAndCComments(p); if (*p == '!' || *p == '\n') { return p; + } else if (*p == '@') { + return IsAtProcess(p) ? p : nullptr; } else { return nullptr; } diff --git a/flang/test/Fir/omp-declare-target-data.fir b/flang/test/Fir/omp-declare-target-data.fir new file mode 100644 --- /dev/null +++ b/flang/test/Fir/omp-declare-target-data.fir @@ -0,0 +1,78 @@ +// RUN: fir-opt --fir-to-llvm-ir %s | FileCheck %s + +module attributes {omp.is_device = #omp.isdevice} { + + // CHECK: llvm.mlir.global external @_QMtest_0Earray_1d(dense<[1, 2, 3]> : tensor<3xi32>) {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : !llvm.array<3 x i32> + fir.global @_QMtest_0Earray_1d(dense<[1, 2, 3]> : tensor<3xi32>) {omp.declare_target = #omp.declaretarget} : !fir.array<3xi32> + + // CHECK: llvm.mlir.global external @_QMtest_0Earray_2d() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : !llvm.array<2 x array<2 x i32>> + fir.global @_QMtest_0Earray_2d {omp.declare_target = #omp.declaretarget} : !fir.array<2x2xi32> { + %0 = fir.undefined !fir.array<2x2xi32> + %c1_i32 = arith.constant 1 : i32 + %1 = fir.insert_value %0, %c1_i32, [0 : index, 0 : index] : (!fir.array<2x2xi32>, i32) -> !fir.array<2x2xi32> + %c2_i32 = arith.constant 2 : i32 + %2 = fir.insert_value %1, %c2_i32, [1 : index, 0 : index] : (!fir.array<2x2xi32>, i32) -> !fir.array<2x2xi32> + %c3_i32 = arith.constant 3 : i32 + %3 = fir.insert_value %2, %c3_i32, [0 : index, 1 : index] : (!fir.array<2x2xi32>, i32) -> !fir.array<2x2xi32> + %c4_i32 = arith.constant 4 : i32 + %4 = fir.insert_value %3, %c4_i32, [1 : index, 1 : index] : (!fir.array<2x2xi32>, i32) -> !fir.array<2x2xi32> + %c2 = arith.constant 2 : index + %c2_0 = arith.constant 2 : index + fir.has_value %4 : !fir.array<2x2xi32> + } + + // CHECK: llvm.mlir.global external @_QMtest_0Edata_extended_link_1() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : f32 + fir.global @_QMtest_0Edata_extended_link_1 {omp.declare_target = #omp.declaretarget} : f32 { + %cst = arith.constant 2.000000e+00 : f32 + fir.has_value %cst : f32 + } + + // CHECK: llvm.mlir.global external @_QMtest_0Edata_extended_link_2() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : f32 + fir.global @_QMtest_0Edata_extended_link_2 {omp.declare_target = #omp.declaretarget} : f32 { + %cst = arith.constant 3.000000e+00 : f32 + fir.has_value %cst : f32 + } + + // CHECK: llvm.mlir.global external @_QMtest_0Edata_extended_to_1() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : f32 + fir.global @_QMtest_0Edata_extended_to_1 {omp.declare_target = #omp.declaretarget} : f32 { + %cst = arith.constant 2.000000e+00 : f32 + fir.has_value %cst : f32 + } + + // CHECK: llvm.mlir.global external @_QMtest_0Edata_extended_to_2() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : f32 { + fir.global @_QMtest_0Edata_extended_to_2 {omp.declare_target = #omp.declaretarget} : f32 { + %cst = arith.constant 3.000000e+00 : f32 + fir.has_value %cst : f32 + } + + // CHECK: llvm.mlir.global external @_QMtest_0Edata_int() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : i32 + fir.global @_QMtest_0Edata_int {omp.declare_target = #omp.declaretarget} : i32 { + %c10_i32 = arith.constant 10 : i32 + fir.has_value %c10_i32 : i32 + } + + // CHECK: llvm.mlir.global external @_QMtest_0Edata_int_clauseless() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : i32 + fir.global @_QMtest_0Edata_int_clauseless {omp.declare_target = #omp.declaretarget} : i32 { + %c1_i32 = arith.constant 1 : i32 + fir.has_value %c1_i32 : i32 + } + + // CHECK: llvm.mlir.global external @_QMtest_0Edata_int_to() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : i32 + fir.global @_QMtest_0Edata_int_to {omp.declare_target = #omp.declaretarget} : i32 { + %c5_i32 = arith.constant 5 : i32 + fir.has_value %c5_i32 : i32 + } + + // CHECK: llvm.mlir.global external @_QMtest_0Ept1() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> { + fir.global @_QMtest_0Ept1 {omp.declare_target = #omp.declaretarget} : !fir.box> { + %0 = fir.zero_bits !fir.ptr + %1 = fir.embox %0 : (!fir.ptr) -> !fir.box> + fir.has_value %1 : !fir.box> + } + + // CHECK: llvm.mlir.global external @_QMtest_0Ept2_tar() {{{.*}}omp.declare_target = #omp.declaretarget{{.*}}} : i32 + fir.global @_QMtest_0Ept2_tar {omp.declare_target = #omp.declaretarget} target : i32 { + %c5_i32 = arith.constant 5 : i32 + fir.has_value %c5_i32 : i32 + } +} diff --git a/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir b/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir new file mode 100644 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir @@ -0,0 +1,309 @@ +// Test code generation of hlfir.where, and hlfir.elsewhere when there +// is no need to create temporary storage. +// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s + +func.func @test_simple(%arg0: !fir.box>, %arg1: !fir.box>>) { + %cst = arith.constant 4.200000e+01 : f32 + %0:2 = hlfir.declare %arg1 {uniq_name = "mask"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) + %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.where { + hlfir.yield %0#0 : !fir.box>> + } do { + hlfir.region_assign { + hlfir.yield %cst : f32 + } to { + hlfir.yield %1#0 : !fir.box> + } + } + return +} +// CHECK-LABEL: func.func @test_simple( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.box>>) { +// CHECK: %[[VAL_2:.*]] = arith.constant 4.200000e+01 : f32 +// CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "mask"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_5]] : (!fir.box>>, index) -> (index, index, index) +// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_6]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_8]] to %[[VAL_6]]#1 step %[[VAL_8]] { +// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_9]]) : (!fir.box>>, index) -> !fir.ref> +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref> +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_12]] { +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_13]] : f32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: } + + +func.func @test_elsewhere(%arg0: !fir.ref>, %arg1: !fir.ref>, %arg2: !fir.ref>, %arg3: !fir.ref>>, %arg4: !fir.ref>> {fir.bindc_name = "mask2"}) { + %c100 = arith.constant 100 : index + %0 = fir.shape %c100 : (index) -> !fir.shape<1> + %1:2 = hlfir.declare %arg3(%0) {uniq_name = "mask"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) + %2:2 = hlfir.declare %arg4(%0) {uniq_name = "mask2"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) + %3:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %4:2 = hlfir.declare %arg1(%0) {uniq_name = "y"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %5:2 = hlfir.declare %arg2(%0) {uniq_name = "z"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + hlfir.where { + hlfir.yield %1#0 : !fir.ref>> + } do { + hlfir.region_assign { + hlfir.yield %4#0 : !fir.ref> + } to { + hlfir.yield %3#0 : !fir.ref> + } + hlfir.elsewhere mask { + hlfir.yield %2#0 : !fir.ref>> + } do { + hlfir.region_assign { + hlfir.yield %3#0 : !fir.ref> + } to { + hlfir.yield %4#0 : !fir.ref> + } + hlfir.elsewhere do { + hlfir.region_assign { + hlfir.yield %4#0 : !fir.ref> + } to { + hlfir.yield %5#0 : !fir.ref> + } + } + } + } + return +} +// CHECK-LABEL: func.func @test_elsewhere( +// CHECK-SAME: %[[VAL_0:[^:]*]]: !fir.ref>, +// CHECK-SAME: %[[VAL_1:[^:]*]]: !fir.ref>, +// CHECK-SAME: %[[VAL_2:[^:]*]]: !fir.ref>, +// CHECK-SAME: %[[VAL_3:[^:]*]]: !fir.ref>>, +// CHECK-SAME: %[[VAL_4:[^:]*]]: !fir.ref>> {fir.bindc_name = "mask2"}) { +// CHECK: %[[VAL_5:.*]] = arith.constant 100 : index +// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_3]](%[[VAL_6]]) {uniq_name = "mask"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_6]]) {uniq_name = "mask2"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "x"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_6]]) {uniq_name = "y"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_6]]) {uniq_name = "z"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_13:.*]] = %[[VAL_12]] to %[[VAL_5]] step %[[VAL_12]] { +// CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]]) : (!fir.ref>>, index) -> !fir.ref> +// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref> +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_16]] { +// CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_13]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_13]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_17]] to %[[VAL_18]] : !fir.ref, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_19:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_20:.*]] = %[[VAL_19]] to %[[VAL_5]] step %[[VAL_19]] { +// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]]) : (!fir.ref>>, index) -> !fir.ref> +// CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref> +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_23]] { +// CHECK: } else { +// CHECK: %[[VAL_24:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_20]]) : (!fir.ref>>, index) -> !fir.ref> +// CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref> +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_26]] { +// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_20]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_20]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_28]] : !fir.ref, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: %[[VAL_29:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_30:.*]] = %[[VAL_29]] to %[[VAL_5]] step %[[VAL_29]] { +// CHECK: %[[VAL_31:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_30]]) : (!fir.ref>>, index) -> !fir.ref> +// CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_31]] : !fir.ref> +// CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_33]] { +// CHECK: } else { +// CHECK: %[[VAL_34:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_30]]) : (!fir.ref>>, index) -> !fir.ref> +// CHECK: %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref> +// CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_36]] { +// CHECK: } else { +// CHECK: %[[VAL_37:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_30]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: %[[VAL_38:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_30]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_37]] to %[[VAL_38]] : !fir.ref, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: } + +func.func @expr_tree(%arg0: !fir.box>, %arg1: !fir.box>, %arg2: !fir.box>>) { + %cst = arith.constant 0.000000e+00 : f32 + %c-1 = arith.constant -1 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %0:2 = hlfir.declare %arg2 {uniq_name = "mask"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) + %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %2:2 = hlfir.declare %arg1 {uniq_name = "y"} : (!fir.box>) -> (!fir.box>, !fir.box>) + hlfir.where { + %3 = fir.shape %c10 : (index) -> !fir.shape<1> + %4 = hlfir.designate %2#0 (%c10:%c1:%c-1) shape %3 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %5 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10xf32> { + ^bb0(%arg3: index): + %9 = hlfir.designate %4 (%arg3) : (!fir.box>, index) -> !fir.ref + %10 = fir.load %9 : !fir.ref + %11 = math.absf %10 fastmath : f32 + hlfir.yield_element %11 : f32 + } + %6 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> { + ^bb0(%arg3: index): + %9 = hlfir.apply %5, %arg3 : (!hlfir.expr<10xf32>, index) -> f32 + %10 = arith.cmpf ogt, %9, %cst : f32 + %11 = fir.convert %10 : (i1) -> !fir.logical<4> + hlfir.yield_element %11 : !fir.logical<4> + } + %7 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> { + ^bb0(%arg3: index): + %9 = hlfir.apply %6, %arg3 : (!hlfir.expr<10x!fir.logical<4>>, index) -> !fir.logical<4> + %10 = hlfir.no_reassoc %9 : !fir.logical<4> + hlfir.yield_element %10 : !fir.logical<4> + } + %8 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> { + ^bb0(%arg3: index): + %9 = hlfir.apply %7, %arg3 : (!hlfir.expr<10x!fir.logical<4>>, index) -> !fir.logical<4> + %10 = hlfir.designate %0#0 (%arg3) : (!fir.box>>, index) -> !fir.ref> + %11 = fir.load %10 : !fir.ref> + %12 = fir.convert %9 : (!fir.logical<4>) -> i1 + %13 = fir.convert %11 : (!fir.logical<4>) -> i1 + %14 = arith.andi %12, %13 : i1 + %15 = fir.convert %14 : (i1) -> !fir.logical<4> + hlfir.yield_element %15 : !fir.logical<4> + } + hlfir.yield %8 : !hlfir.expr<10x!fir.logical<4>> cleanup { + hlfir.destroy %8 : !hlfir.expr<10x!fir.logical<4>> + hlfir.destroy %7 : !hlfir.expr<10x!fir.logical<4>> + hlfir.destroy %6 : !hlfir.expr<10x!fir.logical<4>> + hlfir.destroy %5 : !hlfir.expr<10xf32> + } + } do { + hlfir.region_assign { + hlfir.yield %2#0 : !fir.box> + } to { + hlfir.yield %1#0 : !fir.box> + } + } + return +} +// CHECK-LABEL: func.func @expr_tree( +// CHECK-SAME: %[[VAL_0:[^:]*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:[^:]*]]: !fir.box>, +// CHECK-SAME: %[[VAL_2:.*]]: !fir.box>>) { +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_4:.*]] = arith.constant -1 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_6:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "mask"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "y"} : (!fir.box>) -> (!fir.box>, !fir.box>) +// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_6]]:%[[VAL_5]]:%[[VAL_4]]) shape %[[VAL_10]] : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> +// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_13:.*]] = %[[VAL_12]] to %[[VAL_6]] step %[[VAL_12]] { +// CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_13]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref +// CHECK: %[[VAL_16:.*]] = math.absf %[[VAL_15]] fastmath : f32 +// CHECK: %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_3]] : f32 +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i1) -> !fir.logical<4> +// CHECK: %[[VAL_19:.*]] = hlfir.no_reassoc %[[VAL_18]] : !fir.logical<4> +// CHECK: %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]]) : (!fir.box>>, index) -> !fir.ref> +// CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref> +// CHECK: %[[VAL_22:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1 +// CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1 +// CHECK: %[[VAL_24:.*]] = arith.andi %[[VAL_22]], %[[VAL_23]] : i1 +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i1) -> !fir.logical<4> +// CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_26]] { +// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_13]]) : (!fir.box>, index) -> !fir.ref +// CHECK: %[[VAL_28:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_13]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_28]] : !fir.ref, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: } + +func.func @inside_forall(%arg0: !fir.ref>, %arg1: !fir.ref>) { + %c1 = arith.constant 1 : index + %cst = arith.constant 0.000000e+00 : f32 + %c10_i32 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %c10 = arith.constant 10 : index + %c20 = arith.constant 20 : index + %0 = fir.shape %c10, %c20 : (index, index) -> !fir.shape<2> + %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) + %2 = fir.shape %c20 : (index) -> !fir.shape<1> + %3:2 = hlfir.declare %arg1(%2) {uniq_name = "y"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + hlfir.forall lb { + hlfir.yield %c1_i32 : i32 + } ub { + hlfir.yield %c10_i32 : i32 + } (%arg2: i32) { + hlfir.where { + %4 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr<20x!fir.logical<4>> { + ^bb0(%arg3: index): + %5 = hlfir.designate %3#0 (%arg3) : (!fir.ref>, index) -> !fir.ref + %6 = fir.load %5 : !fir.ref + %7 = arith.cmpf ogt, %6, %cst : f32 + %8 = fir.convert %7 : (i1) -> !fir.logical<4> + hlfir.yield_element %8 : !fir.logical<4> + } + hlfir.yield %4 : !hlfir.expr<20x!fir.logical<4>> cleanup { + hlfir.destroy %4 : !hlfir.expr<20x!fir.logical<4>> + } + } do { + hlfir.region_assign { + hlfir.yield %3#0 : !fir.ref> + } to { + %4 = fir.convert %arg2 : (i32) -> i64 + %5 = hlfir.designate %1#0 (%4, %c1:%c20:%c1) shape %2 : (!fir.ref>, i64, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.yield %5 : !fir.box> + } + } + } + return +} +// CHECK-LABEL: func.func @inside_forall( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.ref>) { +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_6:.*]] = arith.constant 10 : index +// CHECK: %[[VAL_7:.*]] = arith.constant 20 : index +// CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_6]], %[[VAL_7]] : (index, index) -> !fir.shape<2> +// CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_8]]) {uniq_name = "x"} : (!fir.ref>, !fir.shape<2>) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_10]]) {uniq_name = "y"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_5]] : (i32) -> index +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_4]] : (i32) -> index +// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index +// CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_14]] { +// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (index) -> i32 +// CHECK: %[[VAL_17:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (i32) -> i64 +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_18]], %[[VAL_2]]:%[[VAL_7]]:%[[VAL_2]]) shape %[[VAL_10]] : (!fir.ref>, i64, index, index, index, !fir.shape<1>) -> !fir.box> +// CHECK: fir.do_loop %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_7]] step %[[VAL_17]] { +// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_20]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref +// CHECK: %[[VAL_23:.*]] = arith.cmpf ogt, %[[VAL_22]], %[[VAL_3]] : f32 +// CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i1) -> !fir.logical<4> +// CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.logical<4>) -> i1 +// CHECK: fir.if %[[VAL_25]] { +// CHECK: %[[VAL_26:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_20]]) : (!fir.ref>, index) -> !fir.ref +// CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_19]] (%[[VAL_20]]) : (!fir.box>, index) -> !fir.ref +// CHECK: hlfir.assign %[[VAL_26]] to %[[VAL_27]] : !fir.ref, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: } diff --git a/flang/test/HLFIR/order_assignments/where-fusing-scheduling.f90 b/flang/test/HLFIR/order_assignments/where-fusing-scheduling.f90 new file mode 100644 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/where-fusing-scheduling.f90 @@ -0,0 +1,41 @@ +! Test scheduling of WHERE in lower-hlfir-ordered-assignments pass +! when fusing is enabled or disabled. + +!RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments{fuse-assignments=false})" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s --check-prefix NOFUSE + +!RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments{fuse-assignments=true})" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s --check-prefix FUSE + +!REQUIRES: asserts + +subroutine fusable(x, y, mask) + real :: x(:), y(:) + logical :: mask(:) + where (mask) + x = 41. + y = 42. + end where +end subroutine + +subroutine unfusable(x, y, mask) + real :: x(:), y(:) + logical :: mask(:) + where (mask) + x(1:10) = y + y = x(10:1:-1) + end where +end subroutine + +!NOFUSE-LABEL: ------------ scheduling where in _QPfusable ------------ +!NOFUSE-NEXT: run 1 evaluate: where/region_assign1 +!NOFUSE-NEXT: run 2 evaluate: where/region_assign2 +!NOFUSE-LABEL: ------------ scheduling where in _QPunfusable ------------ +!NOFUSE-NEXT: run 1 evaluate: where/region_assign1 +!NOFUSE-NEXT: run 2 evaluate: where/region_assign2 + +!FUSE-LABEL: ------------ scheduling where in _QPfusable ------------ +!FUSE-NEXT: run 1 evaluate: where/region_assign1 +!FUSE-NEXT: run 1 evaluate: where/region_assign2 +!FUSE-LABEL: ------------ scheduling where in _QPunfusable ------------ +!FUSE-NEXT: run 1 evaluate: where/region_assign1 +!FUSE-NEXT: conflict: R/W: of type '!fir.box>' at index: 1 W: of type '!fir.box>' at index: 1 +!FUSE-NEXT: run 2 evaluate: where/region_assign2 diff --git a/flang/test/HLFIR/order_assignments/where-scheduling.f90 b/flang/test/HLFIR/order_assignments/where-scheduling.f90 new file mode 100644 --- /dev/null +++ b/flang/test/HLFIR/order_assignments/where-scheduling.f90 @@ -0,0 +1,128 @@ +! Test scheduling of WHERE in lower-hlfir-ordered-assignments pass. + +! RUN: bbc -hlfir -o - -pass-pipeline="builtin.module(lower-hlfir-ordered-assignments)" --debug-only=flang-ordered-assignment -flang-dbg-order-assignment-schedule-only %s 2>&1 | FileCheck %s +! REQUIRES: asserts + +subroutine no_conflict(x, y) + real :: x(:), y(:) + where (y.gt.0) x = y +end subroutine + +subroutine fake_conflict(x, y) + ! The conflict here could be avoided because the read and write are + ! aligned, so there would not be any read after write at the element + ! level, but this will require a bit more work to detect this (like + ! comparing the hlfir.designate operations). + real :: x(:), y(:) + where (x.gt.y) x = y +end subroutine + +subroutine only_once(x, y, z) + interface + impure function call_me_only_once() + logical :: call_me_only_once(10) + end function + end interface + real :: x(:), y(:), z(:) + where (call_me_only_once()) + x = y + z = y + end where +end subroutine + +subroutine rhs_lhs_conflict(x, y) + real :: x(:, :), y(:, :) + where (y.gt.0.) x = transpose(x) +end subroutine + +subroutine where_construct_no_conflict(x, y, z, mask1, mask2) + real :: x(:), y(:), z(:) + logical :: mask1(:), mask2(:) + where (mask1) + x = y + elsewhere (mask2) + z = y + end where +end subroutine + +subroutine where_construct_conflict(x, y) + real :: x(:, :), y(:, :) + where (y.gt.0.) + x = y + elsewhere (x.gt.0) + y = x + end where +end subroutine + +subroutine where_construct_conflict_2(x, y) + real :: x(:, :), y(:, :) + where (x.gt.0.) + x = y + elsewhere (y.gt.0) + y = x + end where +end subroutine + +subroutine where_vector_subscript_conflict_1(x, vec1) + real :: x(10) + integer :: vec1(10) + where (x(vec1).lt.0.) x = 42. +end subroutine + +subroutine where_vector_subscript_conflict_2(x, vec1) + integer :: x(10) + real :: y(10) + where (y(x).lt.0.) x = 0 +end subroutine + +subroutine where_in_forall_conflict(x) + real :: x(:, :) + forall (i = 1:10) + where (x(i, :).gt.0) x(:, i) = x(i, :) + end forall +end subroutine + +!CHECK-LABEL: ------------ scheduling where in _QPno_conflict ------------ +!CHECK-NEXT: run 1 evaluate: where/region_assign1 +!CHECK-LABEL: ------------ scheduling where in _QPfake_conflict ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : where/mask +!CHECK-NEXT: run 2 evaluate: where/region_assign1 +!CHECK-LABEL: ------------ scheduling where in _QPonly_once ------------ +!CHECK-NEXT: unknown effect: %9 = fir.call @llvm.stacksave() fastmath : () -> !fir.ref +!CHECK-NEXT: run 1 save (w): where/mask +!CHECK-NEXT: run 2 evaluate: where/region_assign1 +!CHECK-NEXT: run 3 evaluate: where/region_assign2 +!CHECK-LABEL: ------------ scheduling where in _QPrhs_lhs_conflict ------------ +!CHECK-NEXT: unknown effect: %2 = hlfir.transpose %0#0 : (!fir.box>) -> !hlfir.expr +!CHECK-NEXT: run 1 save (w): where/region_assign1/rhs +!CHECK-NEXT: run 2 evaluate: where/region_assign1 +!CHECK-LABEL: ------------ scheduling where in _QPwhere_construct_no_conflict ------------ +!CHECK-NEXT: run 1 evaluate: where/region_assign1 +!CHECK-NEXT: run 2 evaluate: where/elsewhere1/region_assign1 +!CHECK-LABEL: ------------ scheduling where in _QPwhere_construct_conflict ------------ +!CHECK-NEXT: run 1 evaluate: where/region_assign1 +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 1 W: of type '!fir.box>' at index: 1 +!CHECK-NEXT: run 2 save : where/mask +!CHECK-NEXT: run 3 evaluate: where/elsewhere1/region_assign1 +!CHECK-LABEL: ------------ scheduling where in _QPwhere_construct_conflict_2 ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : where/mask +!CHECK-NEXT: run 2 evaluate: where/region_assign1 +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 1 W: of type '!fir.box>' at index: 1 +!CHECK-NEXT: run 3 save : where/elsewhere1/mask +!CHECK-NEXT: run 4 evaluate: where/elsewhere1/region_assign1 +!CHECK-LABEL: ------------ scheduling where in _QPwhere_vector_subscript_conflict_1 ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.ref>' at index: 0 W: of type '!fir.ref>' at index: 0 +!CHECK-NEXT: run 1 save : where/mask +!CHECK-NEXT: run 2 evaluate: where/region_assign1 +!CHECK-LABEL: ------------ scheduling where in _QPwhere_vector_subscript_conflict_2 ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.ref>' at index: 0 W: of type '!fir.ref>' at index: 0 +!CHECK-NEXT: run 1 save : where/mask +!CHECK-NEXT: run 2 evaluate: where/region_assign1 +!CHECK-LABEL: ------------ scheduling forall in _QPwhere_in_forall_conflict ------------ +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/where1/mask +!CHECK-NEXT: conflict: R/W: of type '!fir.box>' at index: 0 W: of type '!fir.box>' at index: 0 +!CHECK-NEXT: run 1 save : forall/where1/region_assign1/rhs +!CHECK-NEXT: run 2 evaluate: forall/where1/region_assign1 diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90 --- a/flang/test/Lower/OpenACC/acc-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-loop.f90 @@ -2,6 +2,11 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s +! CHECK-LABEL: acc.private.recipe @privatization_10x10xf32 : !fir.ref> init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: acc.yield %{{.*}} : !fir.ref> +! CHECK: } + program acc_loop integer :: i, j @@ -154,7 +159,7 @@ a(i) = b(i) END DO -!CHECK: acc.loop private(%{{.*}} : !fir.ref>) { +!CHECK: acc.loop private(@privatization_10x10xf32 -> %{{.*}} : !fir.ref>) { !CHECK: fir.do_loop !CHECK: acc.yield !CHECK-NEXT: }{{$}} @@ -164,7 +169,7 @@ a(i) = b(i) END DO -!CHECK: acc.loop private(%{{.*}}, %{{.*}} : !fir.ref>, !fir.ref>) { +!CHECK: acc.loop private(@privatization_10x10xf32 -> %{{.*}} : !fir.ref>, @privatization_10x10xf32 -> %{{.*}} : !fir.ref>) { !CHECK: fir.do_loop !CHECK: acc.yield !CHECK-NEXT: }{{$}} @@ -174,7 +179,7 @@ a(i) = b(i) END DO -!CHECK: acc.loop private(%{{.*}}, %{{.*}} : !fir.ref>, !fir.ref>) { +!CHECK: acc.loop private(@privatization_10x10xf32 -> %{{.*}} : !fir.ref>, @privatization_10x10xf32 -> %{{.*}} : !fir.ref>) { !CHECK: fir.do_loop !CHECK: acc.yield !CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 --- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 @@ -2,6 +2,13 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s +! CHECK-LABEL: acc.private.recipe @privatization_10xf32 : !fir.ref> init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: acc.yield %{{.*}} : !fir.ref> +! CHECK: } + +! CHECK-LABEL: func.func @_QPacc_parallel_loop() + subroutine acc_parallel_loop integer :: i, j @@ -447,8 +454,8 @@ a(i) = b(i) END DO -! CHECK: acc.parallel firstprivate(%[[B]] : !fir.ref>) private(%[[A]] : !fir.ref>) { -! CHECK: acc.loop private(%[[A]] : !fir.ref>) { +! CHECK: acc.parallel firstprivate(%[[B]] : !fir.ref>) private(@privatization_10xf32 -> %[[A]] : !fir.ref>) { +! CHECK: acc.loop private(@privatization_10xf32 -> %[[A]] : !fir.ref>) { ! CHECK: fir.do_loop ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/OpenACC/acc-parallel.f90 b/flang/test/Lower/OpenACC/acc-parallel.f90 --- a/flang/test/Lower/OpenACC/acc-parallel.f90 +++ b/flang/test/Lower/OpenACC/acc-parallel.f90 @@ -2,6 +2,13 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s +! CHECK-LABEL: acc.private.recipe @privatization_10x10xf32 : !fir.ref> init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: acc.yield %{{.*}} : !fir.ref> +! CHECK: } + +! CHECK-LABEL: func.func @_QPacc_parallel() + subroutine acc_parallel integer :: i, j @@ -288,11 +295,11 @@ !CHECK: acc.detach accPtr(%[[ATTACH_D]] : !fir.ptr) {dataClause = 10 : i64, name = "d"} !CHECK: acc.detach accPtr(%[[ATTACH_E]] : !fir.ptr) {dataClause = 10 : i64, name = "e"} - !$acc parallel private(a) firstprivate(b) private(c) - !$acc end parallel +!$acc parallel private(a) firstprivate(b) private(c) +!$acc end parallel -!CHECK: acc.parallel firstprivate(%[[B]] : !fir.ref>) private(%[[A]], %[[C]] : !fir.ref>, !fir.ref>) { -!CHECK: acc.yield -!CHECK-NEXT: }{{$}} +! CHECK: acc.parallel firstprivate(%[[B]] : !fir.ref>) private(@privatization_10x10xf32 -> %[[A]] : !fir.ref>, @privatization_10x10xf32 -> %[[C]] : !fir.ref>) { +! CHECK: acc.yield +! CHECK-NEXT: }{{$}} end subroutine acc_parallel diff --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90 --- a/flang/test/Lower/OpenACC/acc-serial-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90 @@ -2,6 +2,13 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s +! CHECK-LABEL: acc.private.recipe @privatization_10xf32 : !fir.ref> init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: acc.yield %{{.*}} : !fir.ref> +! CHECK: } + +! CHECK-LABEL: func.func @_QPacc_serial_loop() + subroutine acc_serial_loop integer :: i, j @@ -363,8 +370,8 @@ a(i) = b(i) END DO -! CHECK: acc.serial firstprivate(%[[B]] : !fir.ref>) private(%[[A]] : !fir.ref>) { -! CHECK: acc.loop private(%[[A]] : !fir.ref>) { +! CHECK: acc.serial firstprivate(%[[B]] : !fir.ref>) private(@privatization_10xf32 -> %[[A]] : !fir.ref>) { +! CHECK: acc.loop private(@privatization_10xf32 -> %[[A]] : !fir.ref>) { ! CHECK: fir.do_loop ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/OpenACC/acc-serial.f90 b/flang/test/Lower/OpenACC/acc-serial.f90 --- a/flang/test/Lower/OpenACC/acc-serial.f90 +++ b/flang/test/Lower/OpenACC/acc-serial.f90 @@ -2,6 +2,13 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s +! CHECK-LABEL: acc.private.recipe @privatization_10x10xf32 : !fir.ref> init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: acc.yield %{{.*}} : !fir.ref> +! CHECK: } + +! CHECK-LABEL: func.func @_QPacc_serial() + subroutine acc_serial integer :: i, j @@ -234,7 +241,7 @@ !$acc serial private(a) firstprivate(b) private(c) !$acc end serial -! CHECK: acc.serial firstprivate(%[[B]] : !fir.ref>) private(%[[A]], %[[C]] : !fir.ref>, !fir.ref>) { +! CHECK: acc.serial firstprivate(%[[B]] : !fir.ref>) private(@privatization_10x10xf32 -> %[[A]] : !fir.ref>, @privatization_10x10xf32 -> %[[C]] : !fir.ref>) { ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/array.f90 b/flang/test/Lower/array.f90 --- a/flang/test/Lower/array.f90 +++ b/flang/test/Lower/array.f90 @@ -102,33 +102,25 @@ integer, dimension(10) :: a0 real, dimension(2,3) :: a1 integer, dimension(3,4) :: a2 + integer, dimension(2,3,4) :: a3 a0 = (/1, 2, 3, 3, 3, 3, 3, 3, 3, 3/) a1 = reshape((/3.5, 3.5, 3.5, 3.5, 3.5, 3.5/), shape(a1)) a2 = reshape((/1, 3, 3, 5, 3, 3, 3, 3, 9, 9, 9, 8/), shape(a2)) + a3 = reshape((/1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12/), shape(a3)) end subroutine range ! a0 array constructor ! CHECK: fir.global internal @_QQro.10xi4.{{.*}}(dense<[1, 2, 3, 3, 3, 3, 3, 3, 3, 3]> : tensor<10xi32>) constant : !fir.array<10xi32> ! a1 array constructor -! CHECK: fir.global internal @_QQro.2x3xr4.{{.*}} constant : !fir.array<2x3xf32> { - ! CHECK-DAG: %cst = arith.constant {{.*}} : f32 - ! CHECK: %{{.*}} = fir.insert_on_range %{{[0-9]+}}, %cst from (0, 0) to (1, 2) : +! CHECK: fir.global internal @_QQro.2x3xr4.{{.*}}(dense<3.500000e+00> : tensor<3x2xf32>) constant : !fir.array<2x3xf32> ! a2 array constructor -! CHECK: fir.global internal @_QQro.3x4xi4.{{.*}} constant : !fir.array<3x4xi32> { - ! CHECK-DAG: %[[c1_i32:.*]] = arith.constant 1 : i32 - ! CHECK-DAG: %[[c3_i32:.*]] = arith.constant 3 : i32 - ! CHECK-DAG: %[[c5_i32:.*]] = arith.constant 5 : i32 - ! CHECK-DAG: %[[c8_i32:.*]] = arith.constant 8 : i32 - ! CHECK-DAG: %[[c9_i32:.*]] = arith.constant 9 : i32 - ! CHECK: %[[r1:.*]] = fir.insert_value %{{.*}}, %{{.*}}, [0 : index, 0 : index] : - ! CHECK: %[[r2:.*]] = fir.insert_on_range %[[r1]], %[[c3_i32]] from (1, 0) to (2, 0) : - ! CHECK: %[[r3:.*]] = fir.insert_value %[[r2]], %{{.*}}, [0 : index, 1 : index] : - ! CHECK: %[[r4:.*]] = fir.insert_on_range %[[r3]], %[[c3_i32]] from (1, 1) to (1, 2) : - ! CHECK: %[[r5:.*]] = fir.insert_on_range %[[r4]], %[[c9_i32]] from (2, 2) to (1, 3) : - ! CHECK: %[[r6:.*]] = fir.insert_value %[[r5]], %{{.*}}, [2 : index, 3 : index] : +! CHECK: fir.global internal @_QQro.3x4xi4.{{.*}}(dense<{{\[\[1, 3, 3], \[5, 3, 3], \[3, 3, 9], \[9, 9, 8]]}}> : tensor<4x3xi32>) constant : !fir.array<3x4xi32> + +! a3 array constructor +! CHECK: fir.global internal @_QQro.2x3x4xi4.{{.*}}(dense<{{\[\[\[1, 1], \[2, 2], \[3, 3]], \[\[4, 4], \[5, 5], \[6, 6]], \[\[7, 7], \[8, 8], \[9, 9]], \[\[10, 10], \[11, 11], \[12, 12]]]}}> : tensor<4x3x2xi32>) constant : !fir.array<2x3x4xi32> ! CHECK-LABEL rangeGlobal subroutine rangeGlobal() @@ -137,6 +129,15 @@ end subroutine rangeGlobal +! CHECK-LABEL hugeGlobal +subroutine hugeGlobal() + integer, parameter :: D = 500 + integer, dimension(D, D) :: a + +! CHECK: fir.global internal @_QQro.500x500xi4.{{.*}}(dense<{{.*}}> : tensor<500x500xi32>) constant : !fir.array<500x500xi32> + a = reshape((/(i, i = 1, D * D)/), shape(a)) +end subroutine hugeGlobal + block data real(selected_real_kind(6)) :: x(5,5) common /block/ x diff --git a/flang/test/Lower/dense-array-any-rank.f90 b/flang/test/Lower/dense-array-any-rank.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/dense-array-any-rank.f90 @@ -0,0 +1,25 @@ +! RUN: bbc -emit-fir -o - %s | FileCheck --check-prefixes="CHECK-FIR" %s +! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck --check-prefixes="CHECK-LLVMIR" %s + +! CHECK-LABEL: test +subroutine test() + integer, dimension(10) :: a1 + integer, dimension(3,4) :: a2 + integer, dimension(2,3,4) :: a3 + + a1 = (/1, 2, 3, 4, 5, 6, 7, 8, 9, 10/) + a2 = reshape((/11, 12, 13, 21, 22, 23, 31, 32, 33, 41, 42, 43/), shape(a2)) + a3 = reshape((/111, 112, 121, 122, 131, 132, 211, 212, 221, 222, 231, 232, 311, 312, 321, 322, 331, 332, 411, 412, 421, 422, 431, 432/), shape(a3)) +end subroutine + +! a1 array constructor +! CHECK-FIR: fir.global internal @_QQro.10xi4.{{.*}}(dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : tensor<10xi32>) constant : !fir.array<10xi32> +! CHECK-LLVMIR: @_QQro.10xi4.0 = internal constant [10 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10] + +! a2 array constructor +! CHECK-FIR: fir.global internal @_QQro.3x4xi4.{{.*}}(dense<{{\[\[11, 12, 13], \[21, 22, 23], \[31, 32, 33], \[41, 42, 43]]}}> : tensor<4x3xi32>) constant : !fir.array<3x4xi32> +! CHECK-LLVMIR: @_QQro.3x4xi4.1 = internal constant [4 x [3 x i32]] {{\[\[3 x i32] \[i32 11, i32 12, i32 13], \[3 x i32] \[i32 21, i32 22, i32 23], \[3 x i32] \[i32 31, i32 32, i32 33], \[3 x i32] \[i32 41, i32 42, i32 43]]}} + +! a3 array constructor +! CHECK-FIR: fir.global internal @_QQro.2x3x4xi4.{{.*}}(dense<{{\[\[\[111, 112], \[121, 122], \[131, 132]], \[\[211, 212], \[221, 222], \[231, 232]], \[\[311, 312], \[321, 322], \[331, 332]], \[\[411, 412], \[421, 422], \[431, 432]]]}}> : tensor<4x3x2xi32>) constant : !fir.array<2x3x4xi32> +! CHECK-LLVMIR: @_QQro.2x3x4xi4.2 = internal constant [4 x [3 x [2 x i32]]] {{\[\[3 x \[2 x i32]] \[\[2 x i32] \[i32 111, i32 112], \[2 x i32] \[i32 121, i32 122], \[2 x i32] \[i32 131, i32 132]], \[3 x \[2 x i32]] \[\[2 x i32] \[i32 211, i32 212], \[2 x i32] \[i32 221, i32 222], \[2 x i32] \[i32 231, i32 232]], \[3 x \[2 x i32]] \[\[2 x i32] \[i32 311, i32 312], \[2 x i32] \[i32 321, i32 322], \[2 x i32] \[i32 331, i32 332]], \[3 x \[2 x i32]] \[\[2 x i32] \[i32 411, i32 412], \[2 x i32] \[i32 421, i32 422], \[2 x i32] \[i32 431, i32 432]]]}} diff --git a/flang/test/Parser/at-process.f b/flang/test/Parser/at-process.f new file mode 100644 --- /dev/null +++ b/flang/test/Parser/at-process.f @@ -0,0 +1,20 @@ +! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s + +! Test ignoring @PROCESS directive in fixed source form + +@process opt(3) +@process opt(0) +@process +@processopt(3) + subroutine f() +c@process + end + +!CHECK: Character in fixed-form label field must be a digit +@ + +!CHECK: Character in fixed-form label field must be a digit +@proce + +!CHECK: Character in fixed-form label field must be a digit +@precoss diff --git a/flang/test/Parser/at-process.f90 b/flang/test/Parser/at-process.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Parser/at-process.f90 @@ -0,0 +1,23 @@ +! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s + +! Test ignoring @PROCESS directive in free source form + +@process opt(3) +@process opt(0) + @process strict +@processopt(3) +subroutine f() +print *, "@process" + ! @process +end subroutine f + +!CHECK: error: expected '(' +@p + +!CHECK: error: expected '(' +@proce + +!CHECK: error: expected '(' +@precoss +end + diff --git a/flang/unittests/Optimizer/FIRTypesTest.cpp b/flang/unittests/Optimizer/FIRTypesTest.cpp --- a/flang/unittests/Optimizer/FIRTypesTest.cpp +++ b/flang/unittests/Optimizer/FIRTypesTest.cpp @@ -8,13 +8,19 @@ #include "gtest/gtest.h" #include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Dialect/Support/KindMapping.h" #include "flang/Optimizer/Support/InitFIR.h" struct FIRTypesTest : public testing::Test { public: - void SetUp() { fir::support::loadDialects(context); } - + void SetUp() { + fir::support::loadDialects(context); + kindMap = new fir::KindMapping(&context, kindMapInit, "r42a10c14d28i40l41"); + } mlir::MLIRContext context; + fir::KindMapping *kindMap{}; + std::string kindMapInit = + "i10:80,l3:24,a1:8,r54:Double,r62:X86_FP80,r11:PPC_FP128"; }; // Test fir::isPolymorphicType from flang/Optimizer/Dialect/FIRType.h. @@ -253,3 +259,22 @@ EXPECT_EQ(ptrArrNone, fir::updateTypeForUnlimitedPolymorphic(ptrArrTy)); } } + +TEST_F(FIRTypesTest, getTypeAsString) { + EXPECT_EQ("i32", + fir::getTypeAsString(mlir::IntegerType::get(&context, 32), *kindMap)); + EXPECT_EQ( + "f64", fir::getTypeAsString(mlir::FloatType::getF64(&context), *kindMap)); + EXPECT_EQ( + "l8", fir::getTypeAsString(fir::LogicalType::get(&context, 1), *kindMap)); + EXPECT_EQ("z32", + fir::getTypeAsString( + mlir::ComplexType::get(mlir::FloatType::getF32(&context)), *kindMap)); + EXPECT_EQ("c8", + fir::getTypeAsString(fir::CharacterType::get(&context, 1, 1), *kindMap)); + EXPECT_EQ("c8x10", + fir::getTypeAsString(fir::CharacterType::get(&context, 1, 10), *kindMap)); + mlir::Type ty = mlir::IntegerType::get(&context, 64); + mlir::Type arrTy = fir::SequenceType::get({10, 20}, ty); + EXPECT_EQ("10x20xi64", fir::getTypeAsString(arrTy, *kindMap)); +} diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -115,6 +115,7 @@ BitReader BitWriter Core + Support ) add_llvm_executable( prepare_builtins utils/prepare-builtins.cpp ) target_compile_definitions( prepare_builtins PRIVATE ${LLVM_VERSION_DEFINE} ) diff --git a/libcxx/docs/Status/Cxx20.rst b/libcxx/docs/Status/Cxx20.rst --- a/libcxx/docs/Status/Cxx20.rst +++ b/libcxx/docs/Status/Cxx20.rst @@ -50,6 +50,7 @@ .. [#note-P0883.2] P0883: ``ATOMIC_FLAG_INIT`` was marked deprecated in version 14.0, but was undeprecated with the implementation of LWG3659 in version 15.0. .. [#note-P2231] P2231: Optional is complete. The changes to variant haven't been implemented yet. .. [#note-P0408] P0408: Only `view()` members implemented. + .. [#note-P0660] P0660: Section 32.3 Stop Tokens is complete. ``jthread`` hasn't been implemented yet. .. _issues-status-cxx20: diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv --- a/libcxx/docs/Status/Cxx20Issues.csv +++ b/libcxx/docs/Status/Cxx20Issues.csv @@ -210,7 +210,7 @@ "`3250 `__","``std::format``\ : ``#``\ (alternate form) for NaN and inf","Prague","|Complete|","14.0","|format|" "`3251 `__","Are ``std::format``\ alignment specifiers applied to string arguments?","Prague","|Complete|","14.0","|format|" "`3252 `__","Parse locale's aware modifiers for commands are not consistent with POSIX spec","Prague","","","|chrono|" -"`3254 `__","Strike ``stop_token``\ 's ``operator!=``\ ","Prague","","" +"`3254 `__","Strike ``stop_token``\ 's ``operator!=``\ ","Prague","|Complete|","17.0" "`3255 `__","``span``\ 's ``array``\ constructor is too strict","Prague","|Complete|","" "`3260 `__","``year_month*``\ arithmetic rejects durations convertible to years","Prague","","","|chrono|" "`3262 `__","Formatting of negative durations is not specified","Prague","|Complete|","16.0","|chrono| |format|" diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv --- a/libcxx/docs/Status/Cxx20Papers.csv +++ b/libcxx/docs/Status/Cxx20Papers.csv @@ -104,7 +104,7 @@ "`P0553R4 `__","LWG","Bit operations","Cologne","|Complete|","9.0" "`P0631R8 `__","LWG","Math Constants","Cologne","|Complete|","11.0" "`P0645R10 `__","LWG","Text Formatting","Cologne","|Complete| [#note-P0645]_","14.0" -"`P0660R10 `__","LWG","Stop Token and Joining Thread, Rev 10","Cologne","","" +"`P0660R10 `__","LWG","Stop Token and Joining Thread, Rev 10.","Cologne","|In Progress| [#note-P0660]_","" "`P0784R7 `__","CWG","More constexpr containers","Cologne","|Complete|","12.0" "`P0980R1 `__","LWG","Making std::string constexpr","Cologne","|Complete|","15.0" "`P1004R2 `__","LWG","Making std::vector constexpr","Cologne","|Complete|","15.0" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -632,6 +632,10 @@ __stop_token/atomic_unique_lock.h __stop_token/intrusive_list_view.h __stop_token/intrusive_shared_ptr.h + __stop_token/stop_callback.h + __stop_token/stop_source.h + __stop_token/stop_state.h + __stop_token/stop_token.h __string/char_traits.h __string/constexpr_c_functions.h __string/extern_template_lists.h @@ -947,6 +951,7 @@ stdint.h stdio.h stdlib.h + stop_token streambuf string string.h diff --git a/libcxx/include/__stop_token/stop_callback.h b/libcxx/include/__stop_token/stop_callback.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__stop_token/stop_callback.h @@ -0,0 +1,98 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___STOP_TOKEN_STOP_CALLBACK_H +#define _LIBCPP___STOP_TOKEN_STOP_CALLBACK_H + +#include <__concepts/constructible.h> +#include <__concepts/destructible.h> +#include <__concepts/invocable.h> +#include <__config> +#include <__stop_token/intrusive_shared_ptr.h> +#include <__stop_token/stop_state.h> +#include <__stop_token/stop_token.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +template +class stop_callback : private __stop_callback_base { + static_assert(invocable<_Callback>, + "Mandates: stop_callback is instantiated with an argument for the template parameter Callback that " + "satisfies invocable."); + static_assert(destructible<_Callback>, + "Mandates: stop_callback is instantiated with an argument for the template parameter Callback that " + "satisfies destructible."); + +public: + using callback_type = _Callback; + + template + requires constructible_from<_Callback, _Cb> + _LIBCPP_HIDE_FROM_ABI explicit stop_callback(const stop_token& __st, + _Cb&& __cb) noexcept(is_nothrow_constructible_v<_Callback, _Cb>) + : stop_callback(__private_tag{}, __st.__state_, std::forward<_Cb>(__cb)) {} + + template + requires constructible_from<_Callback, _Cb> + _LIBCPP_HIDE_FROM_ABI explicit stop_callback(stop_token&& __st, + _Cb&& __cb) noexcept(is_nothrow_constructible_v<_Callback, _Cb>) + : stop_callback(__private_tag{}, std::move(__st.__state_), std::forward<_Cb>(__cb)) {} + + _LIBCPP_HIDE_FROM_ABI ~stop_callback() { + if (__state_) { + __state_->__remove_callback(this); + } + } + + stop_callback(const stop_callback&) = delete; + stop_callback(stop_callback&&) = delete; + stop_callback& operator=(const stop_callback&) = delete; + stop_callback& operator=(stop_callback&&) = delete; + +private: + _LIBCPP_NO_UNIQUE_ADDRESS _Callback __callback_; + __intrusive_shared_ptr<__stop_state> __state_; + + friend __stop_callback_base; + + struct __private_tag {}; + + template + _LIBCPP_HIDE_FROM_ABI explicit stop_callback(__private_tag, _StatePtr&& __state, _Cb&& __cb) noexcept( + is_nothrow_constructible_v<_Callback, _Cb>) + : __stop_callback_base(+[](__stop_callback_base* __cb_base) noexcept { + // stop callback is supposed to only be called once + std::forward<_Callback>(static_cast(__cb_base)->__callback_)(); + }), + __callback_(std::forward<_Cb>(__cb)), + __state_() { + if (__state && __state->__add_callback(this)) { + // st.stop_requested() was false and this is successfully added to the linked list + __state_ = std::forward<_StatePtr>(__state); + } + } +}; + +template +stop_callback(stop_token, Callback) -> stop_callback; + +#endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___STOP_TOKEN_STOP_TOKEN_H diff --git a/libcxx/include/__stop_token/stop_source.h b/libcxx/include/__stop_token/stop_source.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__stop_token/stop_source.h @@ -0,0 +1,91 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___STOP_TOKEN_STOP_SOURCE_H +#define _LIBCPP___STOP_TOKEN_STOP_SOURCE_H + +#include <__config> +#include <__stop_token/intrusive_shared_ptr.h> +#include <__stop_token/stop_state.h> +#include <__stop_token/stop_token.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +struct nostopstate_t { + explicit nostopstate_t() = default; +}; + +inline constexpr nostopstate_t nostopstate{}; + +class stop_source { +public: + _LIBCPP_HIDE_FROM_ABI stop_source() : __state_(new __stop_state()) { __state_->__increment_stop_source_counter(); } + + _LIBCPP_HIDE_FROM_ABI explicit stop_source(nostopstate_t) noexcept : __state_(nullptr) {} + + _LIBCPP_HIDE_FROM_ABI stop_source(const stop_source& __other) noexcept : __state_(__other.__state_) { + if (__state_) { + __state_->__increment_stop_source_counter(); + } + } + + _LIBCPP_HIDE_FROM_ABI stop_source(stop_source&& __other) noexcept = default; + + _LIBCPP_HIDE_FROM_ABI stop_source& operator=(const stop_source& __other) noexcept { + // increment `__other` first so that we don't hit 0 in case of self-assignment + if (__other.__state_) { + __other.__state_->__increment_stop_source_counter(); + } + if (__state_) { + __state_->__decrement_stop_source_counter(); + } + __state_ = __other.__state_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI stop_source& operator=(stop_source&&) noexcept = default; + + _LIBCPP_HIDE_FROM_ABI ~stop_source() { + if (__state_) { + __state_->__decrement_stop_source_counter(); + } + } + + _LIBCPP_HIDE_FROM_ABI void swap(stop_source& __other) noexcept { __state_.swap(__other.__state_); } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI stop_token get_token() const noexcept { return stop_token(__state_); } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool stop_possible() const noexcept { return __state_ != nullptr; } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool stop_requested() const noexcept { + return __state_ != nullptr && __state_->__stop_requested(); + } + + _LIBCPP_HIDE_FROM_ABI bool request_stop() noexcept { return __state_ && __state_->__request_stop(); } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend bool operator==(const stop_source&, const stop_source&) noexcept = default; + + _LIBCPP_HIDE_FROM_ABI friend void swap(stop_source& __lhs, stop_source& __rhs) noexcept { __lhs.swap(__rhs); } + +private: + __intrusive_shared_ptr<__stop_state> __state_; +}; + +#endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___STOP_TOKEN_STOP_SOURCE_H diff --git a/libcxx/include/__stop_token/stop_state.h b/libcxx/include/__stop_token/stop_state.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__stop_token/stop_state.h @@ -0,0 +1,233 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___STOP_TOKEN_STOP_STATE_H +#define _LIBCPP___STOP_TOKEN_STOP_STATE_H + +#include <__config> +#include <__stop_token/atomic_unique_lock.h> +#include <__stop_token/intrusive_list_view.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +struct __stop_callback_base : __intrusive_node_base<__stop_callback_base> { + using __callback_fn_t = void(__stop_callback_base*) noexcept; + _LIBCPP_HIDE_FROM_ABI explicit __stop_callback_base(__callback_fn_t* __callback_fn) : __callback_fn_(__callback_fn) {} + + _LIBCPP_HIDE_FROM_ABI void __invoke() noexcept { __callback_fn_(this); } + + __callback_fn_t* __callback_fn_; + std::atomic __completed_ = false; + bool* __destroyed_ = nullptr; +}; + +class __stop_state { + static constexpr uint32_t __stop_requested_bit = 1; + static constexpr uint32_t __callback_list_locked_bit = 1 << 1; + static constexpr uint32_t __stop_source_counter_shift = 2; + + // The "stop_source counter" is not used for lifetime reference counting. + // When the number of stop_source reaches 0, the remaining stop_tokens's + // stop_possible will return false. We need this counter to track this. + // + // The "callback list locked" bit implements the atomic_unique_lock to + // guard the operations on the callback list + // + // 31 - 2 | 1 | 0 | + // stop_source counter | callback list locked | stop_requested | + std::atomic __state_ = 0; + + // Reference count for stop_token + stop_callback + stop_source + // When the counter reaches zero, the state is destroyed + // It is used by __intrusive_shared_ptr, but it is stored here for better layout + std::atomic __ref_count_ = 0; + + using __state_t = uint32_t; + using __callback_list_lock = __atomic_unique_lock<__state_t, __callback_list_locked_bit>; + using __callback_list = __intrusive_list_view<__stop_callback_base>; + + __callback_list __callback_list_; + std::thread::id __requesting_thread_; + +public: + _LIBCPP_HIDE_FROM_ABI __stop_state() noexcept = default; + + _LIBCPP_HIDE_FROM_ABI void __increment_stop_source_counter() noexcept { + _LIBCPP_ASSERT( + __state_.load(std::memory_order_relaxed) <= static_cast<__state_t>(~(1 << __stop_source_counter_shift)), + "stop_source's counter reaches the maximum. Incrementing the counter will overflow"); + __state_.fetch_add(1 << __stop_source_counter_shift, std::memory_order_relaxed); + } + + // We are not destroying the object after counter decrements to zero, nor do we have + // operations depend on the ordering of decrementing the counter. relaxed is enough. + _LIBCPP_HIDE_FROM_ABI void __decrement_stop_source_counter() noexcept { + _LIBCPP_ASSERT(__state_.load(std::memory_order_relaxed) >= static_cast<__state_t>(1 << __stop_source_counter_shift), + "stop_source's counter is 0. Decrementing the counter will underflow"); + __state_.fetch_sub(1 << __stop_source_counter_shift, std::memory_order_relaxed); + } + + _LIBCPP_HIDE_FROM_ABI bool __stop_requested() const noexcept { + // acquire because [thread.stoptoken.intro] A call to request_stop that returns true + // synchronizes with a call to stop_requested on an associated stop_token or stop_source + // object that returns true. + // request_stop's compare_exchange_weak has release which syncs with this acquire + return (__state_.load(std::memory_order_acquire) & __stop_requested_bit) != 0; + } + + _LIBCPP_HIDE_FROM_ABI bool __stop_possible_for_stop_token() const noexcept { + // [stoptoken.mem] false if "a stop request was not made and there are no associated stop_source objects" + // Todo: Can this be std::memory_order_relaxed as the standard does not say anything except not to introduce data + // race? + __state_t __curent_state = __state_.load(std::memory_order_acquire); + return ((__curent_state & __stop_requested_bit) != 0) || ((__curent_state >> __stop_source_counter_shift) != 0); + } + + _LIBCPP_HIDE_FROM_ABI bool __request_stop() noexcept { + auto __cb_list_lock = __try_lock_for_request_stop(); + if (!__cb_list_lock.__owns_lock()) { + return false; + } + __requesting_thread_ = std::this_thread::get_id(); + + while (!__callback_list_.__empty()) { + auto __cb = __callback_list_.__pop_front(); + + // allow other callbacks to be removed while invoking the current callback + __cb_list_lock.__unlock(); + + bool __destroyed = false; + __cb->__destroyed_ = &__destroyed; + + __cb->__invoke(); + + // __cb's invoke function could potentially delete itself. We need to check before accessing __cb's member + if (!__destroyed) { + // needs to set __destroyed_ pointer to nullptr, otherwise it points to a local variable + // which is to be destroyed at the end of the loop + __cb->__destroyed_ = nullptr; + + // [stopcallback.cons] If callback is concurrently executing on another thread, then the return + // from the invocation of callback strongly happens before ([intro.races]) callback is destroyed. + // this release syncs with the acquire in the remove_callback + __cb->__completed_.store(true, std::memory_order_release); + __cb->__completed_.notify_all(); + } + + __cb_list_lock.__lock(); + } + + return true; + } + + _LIBCPP_HIDE_FROM_ABI bool __add_callback(__stop_callback_base* __cb) noexcept { + // If it is already stop_requested. Do not try to request it again. + const auto __give_up_trying_to_lock_condition = [__cb](__state_t __state) { + if ((__state & __stop_requested_bit) != 0) { + // already stop requested, synchronously run the callback and no need to lock the list again + __cb->__invoke(); + return true; + } + // no stop source. no need to lock the list to add the callback as it can never be invoked + return (__state >> __stop_source_counter_shift) == 0; + }; + + __callback_list_lock __cb_list_lock(__state_, __give_up_trying_to_lock_condition); + + if (!__cb_list_lock.__owns_lock()) { + return false; + } + + __callback_list_.__push_front(__cb); + + return true; + // unlock here: [thread.stoptoken.intro] Registration of a callback synchronizes with the invocation of + // that callback. + // Note: this release sync with the acquire in the request_stop' __try_lock_for_request_stop + } + + // called by the destructor of stop_callback + _LIBCPP_HIDE_FROM_ABI void __remove_callback(__stop_callback_base* __cb) noexcept { + __callback_list_lock __cb_list_lock(__state_); + + // under below condition, the request_stop call just popped __cb from the list and could execute it now + bool __potentially_executing_now = __cb->__prev_ == nullptr && !__callback_list_.__is_head(__cb); + + if (__potentially_executing_now) { + auto __requested_thread = __requesting_thread_; + __cb_list_lock.__unlock(); + + if (std::this_thread::get_id() != __requested_thread) { + // [stopcallback.cons] If callback is concurrently executing on another thread, then the return + // from the invocation of callback strongly happens before ([intro.races]) callback is destroyed. + __cb->__completed_.wait(false, std::memory_order_acquire); + } else { + // The destructor of stop_callback runs on the same thread of the thread that invokes the callback. + // The callback is potentially invoking its own destuctor. Set the flag to avoid accessing destroyed + // members on the invoking side + if (__cb->__destroyed_) { + *__cb->__destroyed_ = true; + } + } + } else { + __callback_list_.__remove(__cb); + } + } + +private: + _LIBCPP_HIDE_FROM_ABI __callback_list_lock __try_lock_for_request_stop() noexcept { + // If it is already stop_requested, do not try to request stop or lock the list again. + const auto __lock_fail_condition = [](__state_t __state) { return (__state & __stop_requested_bit) != 0; }; + + // set locked and requested bit at the same time + const auto __after_lock_state = [](__state_t __state) { + return __state | __callback_list_locked_bit | __stop_requested_bit; + }; + + // acq because [thread.stoptoken.intro] Registration of a callback synchronizes with the invocation of that + // callback. We are going to invoke the callback after getting the lock, acquire so that we can see the + // registration of a callback (and other writes that happens-before the add_callback) + // Note: the rel (unlock) in the add_callback syncs with this acq + // rel because [thread.stoptoken.intro] A call to request_stop that returns true synchronizes with a call + // to stop_requested on an associated stop_token or stop_source object that returns true. + // We need to make sure that all writes (including user code) before request_stop will be made visible + // to the threads that waiting for `stop_requested == true` + // Note: this rel syncs with the acq in `stop_requested` + const auto __locked_ordering = std::memory_order_acq_rel; + + return __callback_list_lock(__state_, __lock_fail_condition, __after_lock_state, __locked_ordering); + } + + template + friend struct __intrusive_shared_ptr_traits; +}; + +template +struct __intrusive_shared_ptr_traits; + +template <> +struct __intrusive_shared_ptr_traits<__stop_state> { + _LIBCPP_HIDE_FROM_ABI static std::atomic& __get_atomic_ref_count(__stop_state& __state) { + return __state.__ref_count_; + } +}; + +#endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___STOP_TOKEN_STOP_STATE_H diff --git a/libcxx/include/__stop_token/stop_token.h b/libcxx/include/__stop_token/stop_token.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__stop_token/stop_token.h @@ -0,0 +1,63 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___STOP_TOKEN_STOP_TOKEN_H +#define _LIBCPP___STOP_TOKEN_STOP_TOKEN_H + +#include <__config> +#include <__stop_token/intrusive_shared_ptr.h> +#include <__stop_token/stop_state.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +class stop_token { +public: + _LIBCPP_HIDE_FROM_ABI stop_token() noexcept = default; + + _LIBCPP_HIDE_FROM_ABI stop_token(const stop_token&) noexcept = default; + _LIBCPP_HIDE_FROM_ABI stop_token(stop_token&&) noexcept = default; + _LIBCPP_HIDE_FROM_ABI stop_token& operator=(const stop_token&) noexcept = default; + _LIBCPP_HIDE_FROM_ABI stop_token& operator=(stop_token&&) noexcept = default; + _LIBCPP_HIDE_FROM_ABI ~stop_token() = default; + + _LIBCPP_HIDE_FROM_ABI void swap(stop_token& __other) noexcept { __state_.swap(__other.__state_); } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool stop_requested() const noexcept { + return __state_ != nullptr && __state_->__stop_requested(); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool stop_possible() const noexcept { + return __state_ != nullptr && __state_->__stop_possible_for_stop_token(); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend bool operator==(const stop_token&, const stop_token&) noexcept = default; + + _LIBCPP_HIDE_FROM_ABI friend void swap(stop_token& __lhs, stop_token& __rhs) noexcept { __lhs.swap(__rhs); } + +private: + __intrusive_shared_ptr<__stop_state> __state_; + + friend class stop_source; + template + friend class stop_callback; + + _LIBCPP_HIDE_FROM_ABI explicit stop_token(const __intrusive_shared_ptr<__stop_state>& __state) : __state_(__state) {} +}; + +#endif // _LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___STOP_TOKEN_STOP_TOKEN_H diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -1448,11 +1448,17 @@ export * } module stop_token { + @requires_LIBCXX_ENABLE_THREADS@ + header "stop_token" export * module __stop_token { module atomic_unique_lock { private header "__stop_token/atomic_unique_lock.h" } module intrusive_list_view { private header "__stop_token/intrusive_list_view.h" } module intrusive_shared_ptr { private header "__stop_token/intrusive_shared_ptr.h" } + module stop_callback { private header "__stop_token/stop_callback.h" } + module stop_source { private header "__stop_token/stop_source.h" } + module stop_state { private header "__stop_token/stop_state.h" } + module stop_token { private header "__stop_token/stop_token.h" } } } module streambuf { diff --git a/libcxx/include/stop_token b/libcxx/include/stop_token new file mode 100644 --- /dev/null +++ b/libcxx/include/stop_token @@ -0,0 +1,49 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_STOP_TOKEN +#define _LIBCPP_STOP_TOKEN + +/* + +namespace std { + // [stoptoken], class stop_token + class stop_token; + + // [stopsource], class stop_source + class stop_source; + + // no-shared-stop-state indicator + struct nostopstate_t { + explicit nostopstate_t() = default; + }; + inline constexpr nostopstate_t nostopstate{}; + + // [stopcallback], class template stop_callback + template + class stop_callback; + +*/ + +#include <__assert> // all public C++ headers provide the assertion handler +#include <__config> +#include <__stop_token/stop_callback.h> +#include <__stop_token/stop_source.h> +#include <__stop_token/stop_token.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifdef _LIBCPP_HAS_NO_THREADS +# error " is not supported since libc++ has been configured without support for threads." +#endif + +#endif // _LIBCPP_STOP_TOKEN diff --git a/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.sh.cpp b/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.sh.cpp --- a/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.sh.cpp +++ b/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.sh.cpp @@ -555,199 +555,205 @@ #endif // RUN: %{build} -DTEST_101 -#if defined(TEST_101) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include +#if defined(TEST_101) && !defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_AVAILABILITY_SYNC) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_102 -#if defined(TEST_102) -# include +#if defined(TEST_102) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif -// RUN: %{build} -DTEST_104 -#if defined(TEST_104) -# include +// RUN: %{build} -DTEST_103 +#if defined(TEST_103) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_105 -#if defined(TEST_105) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include +#if defined(TEST_105) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_106 -#if defined(TEST_106) -# include +#if defined(TEST_106) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif -// RUN: %{build} -DTEST_108 -#if defined(TEST_108) && !defined(_LIBCPP_HAS_NO_THREADS) -# include +// RUN: %{build} -DTEST_107 +#if defined(TEST_107) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_109 -#if defined(TEST_109) -# include +#if defined(TEST_109) && !defined(_LIBCPP_HAS_NO_THREADS) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_110 #if defined(TEST_110) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_111 #if defined(TEST_111) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_112 #if defined(TEST_112) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif -// RUN: %{build} -DTEST_114 -#if defined(TEST_114) -# include +// RUN: %{build} -DTEST_113 +#if defined(TEST_113) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_115 #if defined(TEST_115) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_116 #if defined(TEST_116) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_117 #if defined(TEST_117) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_118 #if defined(TEST_118) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_119 #if defined(TEST_119) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_120 #if defined(TEST_120) -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif -// RUN: %{build} -DTEST_123 -#if defined(TEST_123) && __cplusplus >= 201103L -# include +// RUN: %{build} -DTEST_121 +#if defined(TEST_121) +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_124 #if defined(TEST_124) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_125 #if defined(TEST_125) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_126 #if defined(TEST_126) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_127 #if defined(TEST_127) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_128 #if defined(TEST_128) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_129 #if defined(TEST_129) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_130 -#if defined(TEST_130) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L -# include +#if defined(TEST_130) && __cplusplus >= 201103L +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_131 -#if defined(TEST_131) && __cplusplus >= 201103L -# include +#if defined(TEST_131) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_132 #if defined(TEST_132) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_133 #if defined(TEST_133) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_134 #if defined(TEST_134) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_135 #if defined(TEST_135) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_136 #if defined(TEST_136) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_137 #if defined(TEST_137) && __cplusplus >= 201103L -# include +# include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif // RUN: %{build} -DTEST_138 #if defined(TEST_138) && __cplusplus >= 201103L +# include + using HandlerType = decltype(std::__libcpp_verbose_abort); +#endif + +// RUN: %{build} -DTEST_139 +#if defined(TEST_139) && __cplusplus >= 201103L # include using HandlerType = decltype(std::__libcpp_verbose_abort); #endif diff --git a/libcxx/test/libcxx/clang_tidy.sh.cpp b/libcxx/test/libcxx/clang_tidy.sh.cpp --- a/libcxx/test/libcxx/clang_tidy.sh.cpp +++ b/libcxx/test/libcxx/clang_tidy.sh.cpp @@ -178,6 +178,9 @@ #include #include #include +#if !defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_AVAILABILITY_SYNC) +# include +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) # include #endif diff --git a/libcxx/test/libcxx/double_include.sh.cpp b/libcxx/test/libcxx/double_include.sh.cpp --- a/libcxx/test/libcxx/double_include.sh.cpp +++ b/libcxx/test/libcxx/double_include.sh.cpp @@ -176,6 +176,9 @@ #include #include #include +#if !defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_AVAILABILITY_SYNC) +# include +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) # include #endif diff --git a/libcxx/test/libcxx/min_max_macros.compile.pass.cpp b/libcxx/test/libcxx/min_max_macros.compile.pass.cpp --- a/libcxx/test/libcxx/min_max_macros.compile.pass.cpp +++ b/libcxx/test/libcxx/min_max_macros.compile.pass.cpp @@ -277,6 +277,10 @@ TEST_MACROS(); #include TEST_MACROS(); +#if !defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_AVAILABILITY_SYNC) +# include +TEST_MACROS(); +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) # include TEST_MACROS(); diff --git a/libcxx/test/libcxx/modules_include.sh.cpp b/libcxx/test/libcxx/modules_include.sh.cpp --- a/libcxx/test/libcxx/modules_include.sh.cpp +++ b/libcxx/test/libcxx/modules_include.sh.cpp @@ -644,232 +644,237 @@ // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_101 &' >> %t.sh // RUN: echo 'TEST_101=$!' >> %t.sh // RUN: echo "wait $TEST_85" >> %t.sh -#if defined(TEST_101) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) -#include +#if defined(TEST_101) && !defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_AVAILABILITY_SYNC) +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_102 &' >> %t.sh // RUN: echo 'TEST_102=$!' >> %t.sh // RUN: echo "wait $TEST_86" >> %t.sh -#if defined(TEST_102) -#include +#if defined(TEST_102) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_103 &' >> %t.sh // RUN: echo 'TEST_103=$!' >> %t.sh // RUN: echo "wait $TEST_87" >> %t.sh #if defined(TEST_103) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_104 &' >> %t.sh // RUN: echo 'TEST_104=$!' >> %t.sh // RUN: echo "wait $TEST_88" >> %t.sh #if defined(TEST_104) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_105 &' >> %t.sh // RUN: echo 'TEST_105=$!' >> %t.sh // RUN: echo "wait $TEST_89" >> %t.sh -#if defined(TEST_105) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) -#include +#if defined(TEST_105) +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_106 &' >> %t.sh // RUN: echo 'TEST_106=$!' >> %t.sh // RUN: echo "wait $TEST_90" >> %t.sh -#if defined(TEST_106) -#include +#if defined(TEST_106) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_107 &' >> %t.sh // RUN: echo 'TEST_107=$!' >> %t.sh // RUN: echo "wait $TEST_91" >> %t.sh #if defined(TEST_107) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_108 &' >> %t.sh // RUN: echo 'TEST_108=$!' >> %t.sh // RUN: echo "wait $TEST_92" >> %t.sh -#if defined(TEST_108) && !defined(_LIBCPP_HAS_NO_THREADS) -#include +#if defined(TEST_108) +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_109 &' >> %t.sh // RUN: echo 'TEST_109=$!' >> %t.sh // RUN: echo "wait $TEST_93" >> %t.sh -#if defined(TEST_109) -#include +#if defined(TEST_109) && !defined(_LIBCPP_HAS_NO_THREADS) +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_110 &' >> %t.sh // RUN: echo 'TEST_110=$!' >> %t.sh // RUN: echo "wait $TEST_94" >> %t.sh #if defined(TEST_110) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_111 &' >> %t.sh // RUN: echo 'TEST_111=$!' >> %t.sh // RUN: echo "wait $TEST_95" >> %t.sh #if defined(TEST_111) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_112 &' >> %t.sh // RUN: echo 'TEST_112=$!' >> %t.sh // RUN: echo "wait $TEST_96" >> %t.sh #if defined(TEST_112) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_113 &' >> %t.sh // RUN: echo 'TEST_113=$!' >> %t.sh // RUN: echo "wait $TEST_97" >> %t.sh #if defined(TEST_113) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_114 &' >> %t.sh // RUN: echo 'TEST_114=$!' >> %t.sh // RUN: echo "wait $TEST_98" >> %t.sh #if defined(TEST_114) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_115 &' >> %t.sh // RUN: echo 'TEST_115=$!' >> %t.sh // RUN: echo "wait $TEST_99" >> %t.sh #if defined(TEST_115) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_116 &' >> %t.sh // RUN: echo 'TEST_116=$!' >> %t.sh // RUN: echo "wait $TEST_100" >> %t.sh #if defined(TEST_116) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_117 &' >> %t.sh // RUN: echo 'TEST_117=$!' >> %t.sh // RUN: echo "wait $TEST_101" >> %t.sh #if defined(TEST_117) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_118 &' >> %t.sh // RUN: echo 'TEST_118=$!' >> %t.sh // RUN: echo "wait $TEST_102" >> %t.sh #if defined(TEST_118) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_119 &' >> %t.sh // RUN: echo 'TEST_119=$!' >> %t.sh // RUN: echo "wait $TEST_103" >> %t.sh #if defined(TEST_119) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_120 &' >> %t.sh // RUN: echo 'TEST_120=$!' >> %t.sh // RUN: echo "wait $TEST_104" >> %t.sh #if defined(TEST_120) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_121 &' >> %t.sh // RUN: echo 'TEST_121=$!' >> %t.sh // RUN: echo "wait $TEST_105" >> %t.sh -#if defined(TEST_121) && !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) -#include +#if defined(TEST_121) +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_122 &' >> %t.sh // RUN: echo 'TEST_122=$!' >> %t.sh // RUN: echo "wait $TEST_106" >> %t.sh #if defined(TEST_122) && !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_123 &' >> %t.sh // RUN: echo 'TEST_123=$!' >> %t.sh // RUN: echo "wait $TEST_107" >> %t.sh -#if defined(TEST_123) && __cplusplus >= 201103L -#include +#if defined(TEST_123) && !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_124 &' >> %t.sh // RUN: echo 'TEST_124=$!' >> %t.sh // RUN: echo "wait $TEST_108" >> %t.sh #if defined(TEST_124) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_125 &' >> %t.sh // RUN: echo 'TEST_125=$!' >> %t.sh // RUN: echo "wait $TEST_109" >> %t.sh #if defined(TEST_125) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_126 &' >> %t.sh // RUN: echo 'TEST_126=$!' >> %t.sh // RUN: echo "wait $TEST_110" >> %t.sh #if defined(TEST_126) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_127 &' >> %t.sh // RUN: echo 'TEST_127=$!' >> %t.sh // RUN: echo "wait $TEST_111" >> %t.sh #if defined(TEST_127) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_128 &' >> %t.sh // RUN: echo 'TEST_128=$!' >> %t.sh // RUN: echo "wait $TEST_112" >> %t.sh #if defined(TEST_128) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_129 &' >> %t.sh // RUN: echo 'TEST_129=$!' >> %t.sh // RUN: echo "wait $TEST_113" >> %t.sh #if defined(TEST_129) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_130 &' >> %t.sh // RUN: echo 'TEST_130=$!' >> %t.sh // RUN: echo "wait $TEST_114" >> %t.sh -#if defined(TEST_130) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L -#include +#if defined(TEST_130) && __cplusplus >= 201103L +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_131 &' >> %t.sh // RUN: echo 'TEST_131=$!' >> %t.sh // RUN: echo "wait $TEST_115" >> %t.sh -#if defined(TEST_131) && __cplusplus >= 201103L -#include +#if defined(TEST_131) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) && __cplusplus >= 201103L +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_132 &' >> %t.sh // RUN: echo 'TEST_132=$!' >> %t.sh // RUN: echo "wait $TEST_116" >> %t.sh #if defined(TEST_132) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_133 &' >> %t.sh // RUN: echo 'TEST_133=$!' >> %t.sh // RUN: echo "wait $TEST_117" >> %t.sh #if defined(TEST_133) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_134 &' >> %t.sh // RUN: echo 'TEST_134=$!' >> %t.sh // RUN: echo "wait $TEST_118" >> %t.sh #if defined(TEST_134) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_135 &' >> %t.sh // RUN: echo 'TEST_135=$!' >> %t.sh // RUN: echo "wait $TEST_119" >> %t.sh #if defined(TEST_135) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_136 &' >> %t.sh // RUN: echo 'TEST_136=$!' >> %t.sh // RUN: echo "wait $TEST_120" >> %t.sh #if defined(TEST_136) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_137 &' >> %t.sh // RUN: echo 'TEST_137=$!' >> %t.sh // RUN: echo "wait $TEST_121" >> %t.sh #if defined(TEST_137) && __cplusplus >= 201103L -#include +#include #endif // RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_138 &' >> %t.sh // RUN: echo 'TEST_138=$!' >> %t.sh // RUN: echo "wait $TEST_122" >> %t.sh #if defined(TEST_138) && __cplusplus >= 201103L -#include +#include #endif +// RUN: echo '%{cxx} %s %{flags} %{compile_flags} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only -DTEST_139 &' >> %t.sh +// RUN: echo 'TEST_139=$!' >> %t.sh // RUN: echo "wait $TEST_123" >> %t.sh +#if defined(TEST_139) && __cplusplus >= 201103L +#include +#endif // RUN: echo "wait $TEST_124" >> %t.sh // RUN: echo "wait $TEST_125" >> %t.sh // RUN: echo "wait $TEST_126" >> %t.sh @@ -885,5 +890,6 @@ // RUN: echo "wait $TEST_136" >> %t.sh // RUN: echo "wait $TEST_137" >> %t.sh // RUN: echo "wait $TEST_138" >> %t.sh +// RUN: echo "wait $TEST_139" >> %t.sh // RUN: bash %t.sh // GENERATED-MARKER diff --git a/libcxx/test/libcxx/nasty_macros.compile.pass.cpp b/libcxx/test/libcxx/nasty_macros.compile.pass.cpp --- a/libcxx/test/libcxx/nasty_macros.compile.pass.cpp +++ b/libcxx/test/libcxx/nasty_macros.compile.pass.cpp @@ -301,6 +301,9 @@ #include #include #include +#if !defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_AVAILABILITY_SYNC) +# include +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) # include #endif diff --git a/libcxx/test/libcxx/no_assert_include.compile.pass.cpp b/libcxx/test/libcxx/no_assert_include.compile.pass.cpp --- a/libcxx/test/libcxx/no_assert_include.compile.pass.cpp +++ b/libcxx/test/libcxx/no_assert_include.compile.pass.cpp @@ -173,6 +173,9 @@ #include #include #include +#if !defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_AVAILABILITY_SYNC) +# include +#endif #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) # include #endif diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -629,6 +629,10 @@ #include <__stop_token/atomic_unique_lock.h> // expected-error@*:* {{use of private header from outside its module: '__stop_token/atomic_unique_lock.h'}} #include <__stop_token/intrusive_list_view.h> // expected-error@*:* {{use of private header from outside its module: '__stop_token/intrusive_list_view.h'}} #include <__stop_token/intrusive_shared_ptr.h> // expected-error@*:* {{use of private header from outside its module: '__stop_token/intrusive_shared_ptr.h'}} +#include <__stop_token/stop_callback.h> // expected-error@*:* {{use of private header from outside its module: '__stop_token/stop_callback.h'}} +#include <__stop_token/stop_source.h> // expected-error@*:* {{use of private header from outside its module: '__stop_token/stop_source.h'}} +#include <__stop_token/stop_state.h> // expected-error@*:* {{use of private header from outside its module: '__stop_token/stop_state.h'}} +#include <__stop_token/stop_token.h> // expected-error@*:* {{use of private header from outside its module: '__stop_token/stop_token.h'}} #include <__string/char_traits.h> // expected-error@*:* {{use of private header from outside its module: '__string/char_traits.h'}} #include <__string/constexpr_c_functions.h> // expected-error@*:* {{use of private header from outside its module: '__string/constexpr_c_functions.h'}} #include <__string/extern_template_lists.h> // expected-error@*:* {{use of private header from outside its module: '__string/extern_template_lists.h'}} diff --git a/libcxx/test/libcxx/transitive_includes.sh.cpp b/libcxx/test/libcxx/transitive_includes.sh.cpp --- a/libcxx/test/libcxx/transitive_includes.sh.cpp +++ b/libcxx/test/libcxx/transitive_includes.sh.cpp @@ -409,136 +409,140 @@ #if defined(TEST_97) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_101 > /dev/null 2> %t/header.streambuf +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_101 > /dev/null 2> %t/header.stop_token #if defined(TEST_101) -#include +#include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_102 > /dev/null 2> %t/header.string +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_102 > /dev/null 2> %t/header.streambuf #if defined(TEST_102) +#include +#endif +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_103 > /dev/null 2> %t/header.string +#if defined(TEST_103) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_104 > /dev/null 2> %t/header.string_view -#if defined(TEST_104) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_105 > /dev/null 2> %t/header.string_view +#if defined(TEST_105) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_105 > /dev/null 2> %t/header.strstream -#if defined(TEST_105) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_106 > /dev/null 2> %t/header.strstream +#if defined(TEST_106) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_106 > /dev/null 2> %t/header.system_error -#if defined(TEST_106) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_107 > /dev/null 2> %t/header.system_error +#if defined(TEST_107) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_108 > /dev/null 2> %t/header.thread -#if defined(TEST_108) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_109 > /dev/null 2> %t/header.thread +#if defined(TEST_109) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_109 > /dev/null 2> %t/header.tuple -#if defined(TEST_109) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_110 > /dev/null 2> %t/header.tuple +#if defined(TEST_110) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_110 > /dev/null 2> %t/header.type_traits -#if defined(TEST_110) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_111 > /dev/null 2> %t/header.type_traits +#if defined(TEST_111) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_111 > /dev/null 2> %t/header.typeindex -#if defined(TEST_111) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_112 > /dev/null 2> %t/header.typeindex +#if defined(TEST_112) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_112 > /dev/null 2> %t/header.typeinfo -#if defined(TEST_112) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_113 > /dev/null 2> %t/header.typeinfo +#if defined(TEST_113) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_114 > /dev/null 2> %t/header.unordered_map -#if defined(TEST_114) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_115 > /dev/null 2> %t/header.unordered_map +#if defined(TEST_115) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_115 > /dev/null 2> %t/header.unordered_set -#if defined(TEST_115) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_116 > /dev/null 2> %t/header.unordered_set +#if defined(TEST_116) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_116 > /dev/null 2> %t/header.utility -#if defined(TEST_116) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_117 > /dev/null 2> %t/header.utility +#if defined(TEST_117) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_117 > /dev/null 2> %t/header.valarray -#if defined(TEST_117) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_118 > /dev/null 2> %t/header.valarray +#if defined(TEST_118) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_118 > /dev/null 2> %t/header.variant -#if defined(TEST_118) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_119 > /dev/null 2> %t/header.variant +#if defined(TEST_119) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_119 > /dev/null 2> %t/header.vector -#if defined(TEST_119) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_120 > /dev/null 2> %t/header.vector +#if defined(TEST_120) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_120 > /dev/null 2> %t/header.version -#if defined(TEST_120) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_121 > /dev/null 2> %t/header.version +#if defined(TEST_121) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_123 > /dev/null 2> %t/header.experimental_deque -#if defined(TEST_123) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_124 > /dev/null 2> %t/header.experimental_deque +#if defined(TEST_124) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_124 > /dev/null 2> %t/header.experimental_forward_list -#if defined(TEST_124) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_125 > /dev/null 2> %t/header.experimental_forward_list +#if defined(TEST_125) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_125 > /dev/null 2> %t/header.experimental_iterator -#if defined(TEST_125) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_126 > /dev/null 2> %t/header.experimental_iterator +#if defined(TEST_126) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_126 > /dev/null 2> %t/header.experimental_list -#if defined(TEST_126) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_127 > /dev/null 2> %t/header.experimental_list +#if defined(TEST_127) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_127 > /dev/null 2> %t/header.experimental_map -#if defined(TEST_127) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_128 > /dev/null 2> %t/header.experimental_map +#if defined(TEST_128) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_128 > /dev/null 2> %t/header.experimental_memory_resource -#if defined(TEST_128) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_129 > /dev/null 2> %t/header.experimental_memory_resource +#if defined(TEST_129) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_129 > /dev/null 2> %t/header.experimental_propagate_const -#if defined(TEST_129) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_130 > /dev/null 2> %t/header.experimental_propagate_const +#if defined(TEST_130) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_130 > /dev/null 2> %t/header.experimental_regex -#if defined(TEST_130) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_131 > /dev/null 2> %t/header.experimental_regex +#if defined(TEST_131) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_131 > /dev/null 2> %t/header.experimental_set -#if defined(TEST_131) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_132 > /dev/null 2> %t/header.experimental_set +#if defined(TEST_132) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_132 > /dev/null 2> %t/header.experimental_simd -#if defined(TEST_132) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_133 > /dev/null 2> %t/header.experimental_simd +#if defined(TEST_133) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_133 > /dev/null 2> %t/header.experimental_string -#if defined(TEST_133) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_134 > /dev/null 2> %t/header.experimental_string +#if defined(TEST_134) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_134 > /dev/null 2> %t/header.experimental_type_traits -#if defined(TEST_134) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_135 > /dev/null 2> %t/header.experimental_type_traits +#if defined(TEST_135) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_135 > /dev/null 2> %t/header.experimental_unordered_map -#if defined(TEST_135) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_136 > /dev/null 2> %t/header.experimental_unordered_map +#if defined(TEST_136) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_136 > /dev/null 2> %t/header.experimental_unordered_set -#if defined(TEST_136) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_137 > /dev/null 2> %t/header.experimental_unordered_set +#if defined(TEST_137) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_137 > /dev/null 2> %t/header.experimental_utility -#if defined(TEST_137) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_138 > /dev/null 2> %t/header.experimental_utility +#if defined(TEST_138) #include #endif -// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_138 > /dev/null 2> %t/header.experimental_vector -#if defined(TEST_138) +// RUN: %{cxx} %s %{flags} %{compile_flags} --trace-includes -fshow-skipped-includes --preprocess -DTEST_139 > /dev/null 2> %t/header.experimental_vector +#if defined(TEST_139) #include #endif // RUN: %{python} %S/transitive_includes_to_csv.py %t > %t/transitive_includes.csv diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -770,6 +770,11 @@ stdexcept cstdlib stdexcept exception stdexcept iosfwd +stop_token atomic +stop_token cstddef +stop_token limits +stop_token thread +stop_token version streambuf cstdint streambuf ios streambuf iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -771,6 +771,11 @@ stdexcept cstdlib stdexcept exception stdexcept iosfwd +stop_token atomic +stop_token cstddef +stop_token limits +stop_token thread +stop_token version streambuf cstdint streambuf ios streambuf iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -773,6 +773,11 @@ stdexcept cstdlib stdexcept exception stdexcept iosfwd +stop_token atomic +stop_token cstddef +stop_token limits +stop_token thread +stop_token version streambuf cstdint streambuf ios streambuf iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -773,6 +773,11 @@ stdexcept cstdlib stdexcept exception stdexcept iosfwd +stop_token atomic +stop_token cstddef +stop_token limits +stop_token thread +stop_token version streambuf cstdint streambuf ios streambuf iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -779,6 +779,11 @@ stdexcept cstdlib stdexcept exception stdexcept iosfwd +stop_token atomic +stop_token cstddef +stop_token limits +stop_token thread +stop_token version streambuf cstdint streambuf ios streambuf iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx2b.csv b/libcxx/test/libcxx/transitive_includes/cxx2b.csv --- a/libcxx/test/libcxx/transitive_includes/cxx2b.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx2b.csv @@ -520,6 +520,11 @@ stack initializer_list stack version stdexcept iosfwd +stop_token atomic +stop_token cstddef +stop_token limits +stop_token thread +stop_token version streambuf cstdint streambuf ios streambuf iosfwd diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/stop_token.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/stop_token.version.compile.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/stop_token.version.compile.pass.cpp @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// WARNING: This test was generated by generate_feature_test_macro_components.py +// and should not be edited manually. +// +// clang-format off + +// UNSUPPORTED: no-threads, availability-synchronization_library-missing + +// + +// Test the feature test macros defined by + +/* Constant Value + __cpp_lib_jthread 201911L [C++20] +*/ + +#include +#include "test_macros.h" + +#if TEST_STD_VER < 14 + +# ifdef __cpp_lib_jthread +# error "__cpp_lib_jthread should not be defined before c++20" +# endif + +#elif TEST_STD_VER == 14 + +# ifdef __cpp_lib_jthread +# error "__cpp_lib_jthread should not be defined before c++20" +# endif + +#elif TEST_STD_VER == 17 + +# ifdef __cpp_lib_jthread +# error "__cpp_lib_jthread should not be defined before c++20" +# endif + +#elif TEST_STD_VER == 20 + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_jthread +# error "__cpp_lib_jthread should be defined in c++20" +# endif +# if __cpp_lib_jthread != 201911L +# error "__cpp_lib_jthread should have the value 201911L in c++20" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_jthread +# error "__cpp_lib_jthread should not be defined because it is unimplemented in libc++!" +# endif +# endif + +#elif TEST_STD_VER > 20 + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_jthread +# error "__cpp_lib_jthread should be defined in c++2b" +# endif +# if __cpp_lib_jthread != 201911L +# error "__cpp_lib_jthread should have the value 201911L in c++2b" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_jthread +# error "__cpp_lib_jthread should not be defined because it is unimplemented in libc++!" +# endif +# endif + +#endif // TEST_STD_VER > 20 + diff --git a/libcxx/test/std/thread/thread.stoptoken/nostopstate/cons.default.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/nostopstate/cons.default.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/nostopstate/cons.default.pass.cpp @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// struct nostopstate_t { +// explicit nostopstate_t() = default; +// }; +// +// inline constexpr nostopstate_t nostopstate{}; + +#include +#include + +#include "test_macros.h" + +static_assert(std::is_trivially_default_constructible_v); + +struct Empty {}; +static_assert(sizeof(Empty) == sizeof(std::nostopstate_t)); + +template +void conversionTest(T); + +template +concept ImplicitlyDefaultConstructible = requires { conversionTest({}); }; +static_assert(!ImplicitlyDefaultConstructible); + +int main(int, char**) { + [[maybe_unused]] auto x = std::nostopstate; + [[maybe_unused]] auto y = std::nostopstate_t{}; + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopcallback/cons.const.token.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopcallback/cons.const.token.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopcallback/cons.const.token.pass.cpp @@ -0,0 +1,236 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// template +// explicit stop_callback(const stop_token& st, C&& cb) +// noexcept(is_nothrow_constructible_v); + +#include +#include +#include +#include +#include +#include +#include + +#include "make_test_thread.h" +#include "test_macros.h" + +struct Cb { + void operator()() const; +}; + +// Constraints: Callback and C satisfy constructible_from. +static_assert(std::is_constructible_v, const std::stop_token&, void (*)()>); +static_assert(!std::is_constructible_v, const std::stop_token&, void (*)(int)>); +static_assert(std::is_constructible_v, const std::stop_token&, Cb&>); +static_assert(std::is_constructible_v, const std::stop_token&, Cb&>); +static_assert(!std::is_constructible_v, const std::stop_token&, int>); + +// explicit +template +void conversion_test(T); + +template +concept ImplicitlyConstructible = requires(Args&&... args) { conversion_test({std::forward(args)...}); }; +static_assert(ImplicitlyConstructible); +static_assert(!ImplicitlyConstructible, const std::stop_token&, Cb>); + +// noexcept +template +struct CbNoExcept { + CbNoExcept(int) noexcept(NoExceptCtor); + void operator()() const; +}; +static_assert(std::is_nothrow_constructible_v>, const std::stop_token&, int>); +static_assert(!std::is_nothrow_constructible_v>, const std::stop_token&, int>); + +int main(int, char**) { + // was requested + { + std::stop_source ss; + const auto st = ss.get_token(); + ss.request_stop(); + + bool called = false; + std::stop_callback sc(st, [&] { called = true; }); + assert(called); + } + + // was not requested + { + std::stop_source ss; + const auto st = ss.get_token(); + + bool called = false; + std::stop_callback sc(st, [&] { called = true; }); + assert(!called); + + ss.request_stop(); + assert(called); + } + + // token has no state + { + std::stop_token st; + bool called = false; + std::stop_callback sc(st, [&] { called = true; }); + assert(!called); + } + + // should not be called multiple times + { + std::stop_source ss; + const auto st = ss.get_token(); + + int calledTimes = 0; + std::stop_callback sc(st, [&] { ++calledTimes; }); + + std::vector threads; + for (auto i = 0; i < 10; ++i) { + threads.emplace_back(support::make_test_thread([&] { ss.request_stop(); })); + } + + for (auto& thread : threads) { + thread.join(); + } + assert(calledTimes == 1); + } + + // adding more callbacks during invoking other callbacks + { + std::stop_source ss; + const auto st = ss.get_token(); + + std::atomic startedFlag = false; + std::atomic finishFlag = false; + std::stop_callback sc(st, [&] { + startedFlag = true; + startedFlag.notify_all(); + finishFlag.wait(false); + }); + + auto thread = support::make_test_thread([&] { ss.request_stop(); }); + + startedFlag.wait(false); + + // first callback is still running, adding another one; + bool secondCallbackCalled = false; + std::stop_callback sc2(st, [&] { secondCallbackCalled = true; }); + + finishFlag = true; + finishFlag.notify_all(); + + thread.join(); + assert(secondCallbackCalled); + } + + // adding callbacks on different threads + { + std::stop_source ss; + const auto st = ss.get_token(); + + std::vector threads; + std::atomic callbackCalledTimes = 0; + std::atomic done = false; + for (auto i = 0; i < 10; ++i) { + threads.emplace_back(support::make_test_thread([&] { + std::stop_callback sc{st, [&] { callbackCalledTimes.fetch_add(1, std::memory_order_relaxed); }}; + done.wait(false); + })); + } + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); + ss.request_stop(); + done = true; + done.notify_all(); + for (auto& thread : threads) { + thread.join(); + } + assert(callbackCalledTimes.load(std::memory_order_relaxed) == 10); + } + + // correct overload + { + struct CBWithTracking { + bool& lvalueCalled; + bool& lvalueConstCalled; + bool& rvalueCalled; + bool& rvalueConstCalled; + + void operator()() & { lvalueCalled = true; } + void operator()() const& { lvalueConstCalled = true; } + void operator()() && { rvalueCalled = true; } + void operator()() const&& { rvalueConstCalled = true; } + }; + + // RValue + { + bool lvalueCalled = false; + bool lvalueConstCalled = false; + bool rvalueCalled = false; + bool rvalueConstCalled = false; + std::stop_source ss; + const auto st = ss.get_token(); + ss.request_stop(); + + std::stop_callback sc( + st, CBWithTracking{lvalueCalled, lvalueConstCalled, rvalueCalled, rvalueConstCalled}); + assert(rvalueCalled); + } + + // RValue + { + bool lvalueCalled = false; + bool lvalueConstCalled = false; + bool rvalueCalled = false; + bool rvalueConstCalled = false; + std::stop_source ss; + const auto st = ss.get_token(); + ss.request_stop(); + + std::stop_callback sc( + st, CBWithTracking{lvalueCalled, lvalueConstCalled, rvalueCalled, rvalueConstCalled}); + assert(rvalueConstCalled); + } + + // LValue + { + bool lvalueCalled = false; + bool lvalueConstCalled = false; + bool rvalueCalled = false; + bool rvalueConstCalled = false; + std::stop_source ss; + const auto st = ss.get_token(); + ss.request_stop(); + CBWithTracking cb{lvalueCalled, lvalueConstCalled, rvalueCalled, rvalueConstCalled}; + std::stop_callback sc(st, cb); + assert(lvalueCalled); + } + + // const LValue + { + bool lvalueCalled = false; + bool lvalueConstCalled = false; + bool rvalueCalled = false; + bool rvalueConstCalled = false; + std::stop_source ss; + const auto st = ss.get_token(); + ss.request_stop(); + CBWithTracking cb{lvalueCalled, lvalueConstCalled, rvalueCalled, rvalueConstCalled}; + std::stop_callback sc(st, cb); + assert(lvalueConstCalled); + } + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopcallback/cons.rvalue.token.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopcallback/cons.rvalue.token.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopcallback/cons.rvalue.token.pass.cpp @@ -0,0 +1,227 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// template +// explicit stop_callback(stop_token&& st, C&& cb) +// noexcept(is_nothrow_constructible_v); + +#include +#include +#include +#include +#include +#include +#include + +#include "make_test_thread.h" +#include "test_macros.h" + +struct Cb { + void operator()() const; +}; + +// Constraints: Callback and C satisfy constructible_from. +static_assert(std::is_constructible_v, std::stop_token&&, void (*)()>); +static_assert(!std::is_constructible_v, std::stop_token&&, void (*)(int)>); +static_assert(std::is_constructible_v, std::stop_token&&, Cb&>); +static_assert(std::is_constructible_v, std::stop_token&&, Cb&>); +static_assert(!std::is_constructible_v, std::stop_token&&, int>); + +// explicit +template +void conversion_test(T); + +template +concept ImplicitlyConstructible = requires(Args&&... args) { conversion_test({std::forward(args)...}); }; +static_assert(ImplicitlyConstructible); +static_assert(!ImplicitlyConstructible, std::stop_token&&, Cb>); + +// noexcept +template +struct CbNoExcept { + CbNoExcept(int) noexcept(NoExceptCtor); + void operator()() const; +}; +static_assert(std::is_nothrow_constructible_v>, std::stop_token&&, int>); +static_assert(!std::is_nothrow_constructible_v>, std::stop_token&&, int>); + +int main(int, char**) { + // was requested + { + std::stop_source ss; + ss.request_stop(); + + bool called = false; + std::stop_callback sc(ss.get_token(), [&] { called = true; }); + assert(called); + } + + // was not requested + { + std::stop_source ss; + + bool called = false; + std::stop_callback sc(ss.get_token(), [&] { called = true; }); + assert(!called); + + ss.request_stop(); + assert(called); + } + + // token has no state + { + std::stop_token st; + bool called = false; + std::stop_callback sc(std::move(st), [&] { called = true; }); + assert(!called); + } + + // should not be called multiple times + { + std::stop_source ss; + + int calledTimes = 0; + std::stop_callback sc(ss.get_token(), [&] { ++calledTimes; }); + + std::vector threads; + for (auto i = 0; i < 10; ++i) { + threads.emplace_back(support::make_test_thread([&] { ss.request_stop(); })); + } + + for (auto& thread : threads) { + thread.join(); + } + assert(calledTimes == 1); + } + + // adding more callbacks during invoking other callbacks + { + std::stop_source ss; + + std::atomic startedFlag = false; + std::atomic finishFlag = false; + std::stop_callback sc(ss.get_token(), [&] { + startedFlag = true; + startedFlag.notify_all(); + finishFlag.wait(false); + }); + + auto thread = support::make_test_thread([&] { ss.request_stop(); }); + + startedFlag.wait(false); + + // first callback is still running, adding another one; + bool secondCallbackCalled = false; + std::stop_callback sc2(ss.get_token(), [&] { secondCallbackCalled = true; }); + + finishFlag = true; + finishFlag.notify_all(); + + thread.join(); + assert(secondCallbackCalled); + } + + // adding callbacks on different threads + { + std::stop_source ss; + + std::vector threads; + std::atomic callbackCalledTimes = 0; + std::atomic done = false; + for (auto i = 0; i < 10; ++i) { + threads.emplace_back(support::make_test_thread([&] { + std::stop_callback sc{ss.get_token(), [&] { callbackCalledTimes.fetch_add(1, std::memory_order_relaxed); }}; + done.wait(false); + })); + } + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); + ss.request_stop(); + done = true; + done.notify_all(); + for (auto& thread : threads) { + thread.join(); + } + assert(callbackCalledTimes.load(std::memory_order_relaxed) == 10); + } + + // correct overload + { + struct CBWithTracking { + bool& lvalueCalled; + bool& lvalueConstCalled; + bool& rvalueCalled; + bool& rvalueConstCalled; + + void operator()() & { lvalueCalled = true; } + void operator()() const& { lvalueConstCalled = true; } + void operator()() && { rvalueCalled = true; } + void operator()() const&& { rvalueConstCalled = true; } + }; + + // RValue + { + bool lvalueCalled = false; + bool lvalueConstCalled = false; + bool rvalueCalled = false; + bool rvalueConstCalled = false; + std::stop_source ss; + ss.request_stop(); + + std::stop_callback sc( + ss.get_token(), CBWithTracking{lvalueCalled, lvalueConstCalled, rvalueCalled, rvalueConstCalled}); + assert(rvalueCalled); + } + + // RValue + { + bool lvalueCalled = false; + bool lvalueConstCalled = false; + bool rvalueCalled = false; + bool rvalueConstCalled = false; + std::stop_source ss; + ss.request_stop(); + + std::stop_callback sc( + ss.get_token(), CBWithTracking{lvalueCalled, lvalueConstCalled, rvalueCalled, rvalueConstCalled}); + assert(rvalueConstCalled); + } + + // LValue + { + bool lvalueCalled = false; + bool lvalueConstCalled = false; + bool rvalueCalled = false; + bool rvalueConstCalled = false; + std::stop_source ss; + ss.request_stop(); + CBWithTracking cb{lvalueCalled, lvalueConstCalled, rvalueCalled, rvalueConstCalled}; + std::stop_callback sc(ss.get_token(), cb); + assert(lvalueCalled); + } + + // const LValue + { + bool lvalueCalled = false; + bool lvalueConstCalled = false; + bool rvalueCalled = false; + bool rvalueConstCalled = false; + std::stop_source ss; + ss.request_stop(); + CBWithTracking cb{lvalueCalled, lvalueConstCalled, rvalueCalled, rvalueConstCalled}; + std::stop_callback sc(ss.get_token(), cb); + assert(lvalueConstCalled); + } + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopcallback/copy.move.compile.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopcallback/copy.move.compile.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopcallback/copy.move.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_callback(const stop_callback&) = delete; +// stop_callback(stop_callback&&) = delete; +// stop_callback& operator=(const stop_callback&) = delete; +// stop_callback& operator=(stop_callback&&) = delete; + +#include +#include + +struct Callback { + void operator()() const; +}; + +static_assert(!std::is_copy_constructible_v>); +static_assert(!std::is_move_constructible_v>); +static_assert(!std::is_copy_assignable_v>); +static_assert(!std::is_move_assignable_v>); diff --git a/libcxx/test/std/thread/thread.stoptoken/stopcallback/ctad.compile.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopcallback/ctad.compile.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopcallback/ctad.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// template +// stop_callback(stop_token, Callback) -> stop_callback; + +#include +#include +#include + +void test() { + std::stop_token st; + auto a = [] {}; + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert( + std::is_same_v>); +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopcallback/dtor.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopcallback/dtor.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopcallback/dtor.pass.cpp @@ -0,0 +1,166 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// ~stop_callback(); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "make_test_thread.h" +#include "test_macros.h" + +struct CallbackHolder; + +struct DeleteHolder { + CallbackHolder& holder_; + void operator()() const; +}; + +struct CallbackHolder { + std::unique_ptr> callback_; +}; + +void DeleteHolder::operator()() const { holder_.callback_.reset(); } + +int main(int, char**) { + // Unregisters the callback from the owned stop state, if any + { + std::stop_source ss; + bool called = false; + + { + std::stop_callback sc(ss.get_token(), [&] { called = true; }); + } + ss.request_stop(); + assert(!called); + } + + // The destructor does not block waiting for the execution of another + // callback registered by an associated stop_callback. + { + std::stop_source ss; + + std::atomic startedIndex = 0; + std::atomic callbackFinish = false; + + std::optional>> sc1(std::in_place, ss.get_token(), [&] { + startedIndex = 1; + startedIndex.notify_all(); + callbackFinish.wait(false); + }); + + std::optional>> sc2(std::in_place, ss.get_token(), [&] { + startedIndex = 2; + startedIndex.notify_all(); + callbackFinish.wait(false); + }); + + auto thread = support::make_test_thread([&] { ss.request_stop(); }); + + startedIndex.wait(0); + + // now one of the callback has started but not finished. + if (startedIndex == 1) { + sc2.reset(); // destructor should not block + } else if (startedIndex == 2) { + sc1.reset(); // destructor should not block + } else { + assert(false); // something is wrong + } + + callbackFinish = true; + callbackFinish.notify_all(); + thread.join(); + } + + // If callback is concurrently executing on another thread, then the + // return from the invocation of callback strongly happens before ([intro.races]) + // callback is destroyed. + { + struct Callback { + std::atomic& started_; + std::atomic& waitDone_; + std::atomic& finished_; + bool moved = false; + + Callback(std::atomic& started, std::atomic& waitDone, std::atomic& finished) + : started_(started), waitDone_(waitDone), finished_(finished) {} + Callback(Callback&& other) : started_(other.started_), waitDone_(other.waitDone_), finished_(other.finished_) { + other.moved = true; + } + + void operator()() const { + struct ScopedGuard { + std::atomic& g_finished_; + ~ScopedGuard() { g_finished_.store(true, std::memory_order_relaxed); } + }; + + started_ = true; + started_.notify_all(); + waitDone_.wait(false); + ScopedGuard g{finished_}; + } + + ~Callback() { + if (!moved) { + // destructor has to be called after operator() returns + assert(finished_.load(std::memory_order_relaxed)); + } + } + }; + + std::stop_source ss; + + std::atomic started = false; + std::atomic waitDone = false; + std::atomic finished = false; + + std::optional> sc{ + std::in_place, ss.get_token(), Callback{started, waitDone, finished}}; + + auto thread1 = support::make_test_thread([&] { ss.request_stop(); }); + started.wait(false); + + auto thread2 = support::make_test_thread([&] { + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); + waitDone = true; + waitDone.notify_all(); + }); + + sc.reset(); // destructor should block until operator() returns, i.e. waitDone to be true + + thread1.join(); + thread2.join(); + } + + // If callback is executing on the current thread, then the destructor does not block ([defns.block]) + // waiting for the return from the invocation of callback. + { + std::stop_source ss; + + CallbackHolder holder; + holder.callback_ = std::make_unique>(ss.get_token(), DeleteHolder{holder}); + + assert(holder.callback_ != nullptr); + + ss.request_stop(); // the callbacks deletes itself. if the destructor blocks, it would be deadlock + assert(holder.callback_ == nullptr); + } +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopcallback/typedef.compile.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopcallback/typedef.compile.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopcallback/typedef.compile.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// using callback_type = _Callback; + +#include +#include + +struct Callback { + void operator()() const; +}; + +static_assert(std::is_same_v::callback_type, Callback>); +static_assert(std::is_same_v::callback_type, const Callback>); +static_assert(std::is_same_v::callback_type, Callback&>); +static_assert(std::is_same_v::callback_type, const Callback&>); +static_assert(std::is_same_v::callback_type, Callback&&>); +static_assert(std::is_same_v::callback_type, const Callback&&>); diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/assign.copy.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/assign.copy.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/assign.copy.pass.cpp @@ -0,0 +1,132 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_source& operator=(const stop_source& rhs) noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_copy_assignable_v); + +int main(int, char**) { + // have two different states + { + std::stop_source ss1; + std::stop_source ss2; + + assert(ss1 != ss2); + + ss2.request_stop(); + + assert(!ss1.stop_requested()); + assert(ss2.stop_requested()); + + std::same_as decltype(auto) ref = ss1 = ss2; + assert(&ref == &ss1); + + assert(ss1 == ss2); + assert(ss1.stop_requested()); + assert(ss2.stop_requested()); + } + + // this has no state + { + std::stop_source ss1{std::nostopstate}; + std::stop_source ss2; + + assert(ss1 != ss2); + + ss2.request_stop(); + + assert(!ss1.stop_requested()); + assert(!ss1.stop_possible()); + assert(ss2.stop_requested()); + assert(ss2.stop_possible()); + + std::same_as decltype(auto) ref = ss1 = ss2; + assert(&ref == &ss1); + + assert(ss1 == ss2); + assert(ss1.stop_requested()); + assert(ss1.stop_possible()); + assert(ss2.stop_requested()); + assert(ss2.stop_possible()); + } + + // other has no state + { + std::stop_source ss1; + std::stop_source ss2{std::nostopstate}; + + assert(ss1 != ss2); + + ss1.request_stop(); + + assert(ss1.stop_requested()); + assert(ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + + std::same_as decltype(auto) ref = ss1 = ss2; + assert(&ref == &ss1); + + assert(ss1 == ss2); + assert(!ss1.stop_requested()); + assert(!ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + } + + // both no state + { + std::stop_source ss1{std::nostopstate}; + std::stop_source ss2{std::nostopstate}; + + assert(ss1 == ss2); + + assert(!ss1.stop_requested()); + assert(!ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + + std::same_as decltype(auto) ref = ss1 = ss2; + assert(&ref == &ss1); + + assert(ss1 == ss2); + assert(!ss1.stop_requested()); + assert(!ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + } + + // self assignment + { + std::stop_source ss; + auto& self = ss; + + assert(!ss.stop_requested()); + + std::same_as decltype(auto) ref = ss = self; + assert(&ref == &ss); + + assert(!ss.stop_requested()); + + ss.request_stop(); + assert(ss.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.copy.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.copy.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.copy.pass.cpp @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_source(const stop_source&) noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_copy_constructible_v); + +int main(int, char**) { + { + std::stop_source source; + std::stop_source copy{source}; + + assert(source == copy); + + assert(source.stop_possible()); + assert(!source.stop_requested()); + + assert(copy.stop_possible()); + assert(!copy.stop_requested()); + + source.request_stop(); + assert(source.stop_possible()); + assert(source.stop_requested()); + + assert(copy.stop_possible()); + assert(copy.stop_requested()); + } + + // source counter incremented + { + std::optional source(std::in_place); + auto st = source->get_token(); + assert(st.stop_possible()); + + std::optional copy{source}; + source.reset(); + + assert(st.stop_possible()); + + copy.reset(); + assert(!st.stop_possible()); + } + + // copy from empty + { + std::stop_source ss1{std::nostopstate}; + std::stop_source copy{ss1}; + assert(!copy.stop_possible()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.default.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.default.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.default.pass.cpp @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_source(); + +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_default_constructible_v); + +int main(int, char**) { + { + std::stop_source ss = {}; // implicit + assert(ss.stop_possible()); + assert(!ss.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.move.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.move.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.move.pass.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_source(stop_source&&) noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_move_constructible_v); + +int main(int, char**) { + { + std::stop_source source; + + assert(source.stop_possible()); + assert(!source.stop_requested()); + + std::stop_source source2{std::move(source)}; + + assert(!source.stop_possible()); + assert(!source.stop_requested()); + + assert(source2.stop_possible()); + assert(!source2.stop_requested()); + + source2.request_stop(); + + assert(!source.stop_possible()); + assert(!source.stop_requested()); + + assert(source2.stop_possible()); + assert(source2.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.nostopstate.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.nostopstate.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/cons.nostopstate.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// explicit stop_source(nostopstate_t) noexcept; + +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_constructible_v); +// explicit +static_assert(!std::is_convertible_v); + +int main(int, char**) { + { + std::stop_source ss(std::nostopstate); + assert(!ss.stop_possible()); + assert(!ss.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/equals.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/equals.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/equals.pass.cpp @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] bool operator==(const stop_source& lhs, const stop_source& rhs) noexcept; +// Returns: true if lhs and rhs have ownership of the same stop state or if both lhs and rhs do not have ownership of a stop state; otherwise false. + +#include +#include +#include +#include + +#include "test_macros.h" + +template +concept IsNoThrowEqualityComparable = requires(const T& t1, const T& t2) { + { t1 == t2 } noexcept; +}; + +static_assert(IsNoThrowEqualityComparable); + +int main(int, char**) { + // both no state + { + const std::stop_source ss1(std::nostopstate); + const std::stop_source ss2(std::nostopstate); + assert(ss1 == ss2); + assert(!(ss1 != ss2)); + } + + // only one has no state + { + const std::stop_source ss1(std::nostopstate); + const std::stop_source ss2; + assert(!(ss1 == ss2)); + assert(ss1 != ss2); + } + + // both has states. same state + { + const std::stop_source ss1; + const std::stop_source ss2(ss1); + assert(ss1 == ss2); + assert(!(ss1 != ss2)); + } + + // both has states. different states + { + const std::stop_source ss1; + const std::stop_source ss2; + assert(!(ss1 == ss2)); + assert(ss1 != ss2); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/get_token.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/get_token.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/get_token.pass.cpp @@ -0,0 +1,50 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] stop_token get_token() const noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +template +concept IsGetTokenNoexcept = requires(const T& t) { + { t.get_token() } noexcept; +}; + +static_assert(IsGetTokenNoexcept); + +int main(int, char**) { + // no state + { + std::stop_source ss{std::nostopstate}; + std::same_as decltype(auto) st = ss.get_token(); + assert(!st.stop_possible()); + assert(!st.stop_requested()); + } + + // with state + { + std::stop_source ss; + std::same_as decltype(auto) st = ss.get_token(); + assert(st.stop_possible()); + assert(!st.stop_requested()); + + ss.request_stop(); + assert(st.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/move.copy.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/move.copy.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/move.copy.pass.cpp @@ -0,0 +1,132 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_source& operator=(stop_source&& rhs) noexcept; + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_move_assignable_v); + +int main(int, char**) { + // have two different states + { + std::stop_source ss1; + std::stop_source ss2; + + assert(ss1 != ss2); + + ss2.request_stop(); + + assert(!ss1.stop_requested()); + assert(ss2.stop_requested()); + + std::same_as decltype(auto) ref = ss1 = std::move(ss2); + assert(&ref == &ss1); + + assert(ss1.stop_requested()); + assert(!ss2.stop_possible()); + assert(!ss2.stop_requested()); + } + + // this has no state + { + std::stop_source ss1{std::nostopstate}; + std::stop_source ss2; + + assert(ss1 != ss2); + + ss2.request_stop(); + + assert(!ss1.stop_requested()); + assert(!ss1.stop_possible()); + assert(ss2.stop_requested()); + assert(ss2.stop_possible()); + + std::same_as decltype(auto) ref = ss1 = std::move(ss2); + assert(&ref == &ss1); + + assert(ss1.stop_requested()); + assert(ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + } + + // other has no state + { + std::stop_source ss1; + std::stop_source ss2{std::nostopstate}; + + assert(ss1 != ss2); + + ss1.request_stop(); + + assert(ss1.stop_requested()); + assert(ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + + std::same_as decltype(auto) ref = ss1 = std::move(ss2); + assert(&ref == &ss1); + + assert(ss1 == ss2); + assert(!ss1.stop_requested()); + assert(!ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + } + + // both no state + { + std::stop_source ss1{std::nostopstate}; + std::stop_source ss2{std::nostopstate}; + + assert(ss1 == ss2); + + assert(!ss1.stop_requested()); + assert(!ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + + std::same_as decltype(auto) ref = ss1 = std::move(ss2); + assert(&ref == &ss1); + + assert(ss1 == ss2); + assert(!ss1.stop_requested()); + assert(!ss1.stop_possible()); + assert(!ss2.stop_requested()); + assert(!ss2.stop_possible()); + } + + // self assignment + { + std::stop_source ss; + auto& self = ss; + + assert(!ss.stop_requested()); + + std::same_as decltype(auto) ref = ss = std::move(self); + assert(&ref == &ss); + + assert(!ss.stop_requested()); + + ss.request_stop(); + assert(ss.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/nodiscard.verify.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/nodiscard.verify.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/nodiscard.verify.cpp @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] stop_token get_token() const noexcept; +// [[nodiscard]] bool stop_possible() const noexcept; +// [[nodiscard]] bool stop_requested() const noexcept; +// [[nodiscard]] friend bool operator==(const stop_source& lhs, const stop_source& rhs) noexcept; + +#include + +void test() { + std::stop_source ss; + ss.get_token(); // expected-warning {{ignoring return value of function}} + ss.stop_requested(); // expected-warning {{ignoring return value of function}} + ss.stop_possible(); // expected-warning {{ignoring return value of function}} + operator==(ss, ss); // expected-warning {{ignoring return value of function}} +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/request_stop.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/request_stop.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/request_stop.pass.cpp @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// bool request_stop() noexcept; + +#include +#include +#include +#include +#include +#include + +#include "make_test_thread.h" +#include "test_macros.h" + +template +concept IsRequestStopNoexcept = requires(T& t) { + { t.request_stop() } noexcept; +}; + +static_assert(IsRequestStopNoexcept); + +int main(int, char**) { + // If *this does not have ownership of a stop state, returns false + { + std::stop_source ss{std::nostopstate}; + auto ret = ss.request_stop(); + assert(!ret); + assert(!ss.stop_requested()); + } + + // Otherwise, atomically determines whether the owned stop state has received + // a stop request, and if not, makes a stop request + { + std::stop_source ss; + + auto ret = ss.request_stop(); + assert(ret); + assert(ss.stop_requested()); + } + + // already requested + { + std::stop_source ss; + ss.request_stop(); + assert(ss.stop_requested()); + + auto ret = ss.request_stop(); + assert(!ret); + assert(ss.stop_requested()); + } + + // If the request was made, the callbacks registered by + // associated stop_callback objects are synchronously called. + { + std::stop_source ss; + auto st = ss.get_token(); + + bool cb1Called = false; + bool cb2Called = false; + std::stop_callback sc1(st, [&] { cb1Called = true; }); + std::stop_callback sc2(st, [&] { cb2Called = true; }); + + ss.request_stop(); + assert(cb1Called); + assert(cb2Called); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/stop_possible.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/stop_possible.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/stop_possible.pass.cpp @@ -0,0 +1,43 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] bool stop_possible() const noexcept; +// Returns: true if *this has ownership of a stop state; otherwise, false. + +#include +#include +#include + +#include "test_macros.h" + +template +concept IsStopPossibleNoexcept = requires(const T& t) { + { t.stop_possible() } noexcept; +}; + +static_assert(IsStopPossibleNoexcept); + +int main(int, char**) { + // no state + { + const std::stop_source st{std::nostopstate}; + assert(!st.stop_possible()); + } + + // with state + { + const std::stop_source st; + assert(st.stop_possible()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/stop_requested.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/stop_requested.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/stop_requested.pass.cpp @@ -0,0 +1,105 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] bool stop_requested() const noexcept; +// true if *this has ownership of a stop state that has received a stop request; otherwise, false. + +#include +#include +#include +#include +#include +#include + +#include "make_test_thread.h" +#include "test_macros.h" + +template +concept IsStopRequestedNoexcept = requires(const T& t) { + { t.stop_requested() } noexcept; +}; + +static_assert(IsStopRequestedNoexcept); + +int main(int, char**) { + // no state + { + const std::stop_source ss{std::nostopstate}; + assert(!ss.stop_requested()); + } + + // has state + { + std::stop_source ss; + assert(!ss.stop_requested()); + + ss.request_stop(); + assert(ss.stop_requested()); + } + + // request from another instance with same state + { + std::stop_source ss1; + auto ss2 = ss1; + ss2.request_stop(); + assert(ss1.stop_requested()); + } + + // request from another instance with different state + { + std::stop_source ss1; + std::stop_source ss2; + + ss2.request_stop(); + assert(!ss1.stop_requested()); + } + + // multiple threads + { + std::stop_source ss; + + std::thread t = support::make_test_thread([&]() { ss.request_stop(); }); + + t.join(); + assert(ss.stop_requested()); + } + + // [thread.stopsource.intro] A call to request_stop that returns true + // synchronizes with a call to stop_requested on an associated stop_source + // or stop_source object that returns true. + { + std::stop_source ss; + + bool flag = false; + + std::thread t = support::make_test_thread([&]() { + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); + + // happens-before request_stop + flag = true; + auto b = ss.request_stop(); + assert(b); + }); + + while (!ss.stop_requested()) { + std::this_thread::yield(); + } + + // write should be visible to the current thread + assert(flag == true); + + t.join(); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/swap.free.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/swap.free.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/swap.free.pass.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// void swap(stop_source& rhs) noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +template +concept IsNoThrowFreeSwappable = requires(T& t) { + { swap(t, t) } noexcept; +}; + +static_assert(IsNoThrowFreeSwappable); + +int main(int, char**) { + { + std::stop_source ss1; + std::stop_source ss2; + + assert(ss1 != ss2); + + ss2.request_stop(); + + assert(!ss1.stop_requested()); + assert(ss2.stop_requested()); + + swap(ss1, ss2); + + assert(ss1 != ss2); + assert(ss1.stop_requested()); + assert(!ss2.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stopsource/swap.member.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stopsource/swap.member.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stopsource/swap.member.pass.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// void swap(stop_source& rhs) noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +template +concept IsNoThrowMemberSwappable = requires(T& t) { + { t.swap(t) } noexcept; +}; + +static_assert(IsNoThrowMemberSwappable); + +int main(int, char**) { + { + std::stop_source ss1; + std::stop_source ss2; + + assert(ss1 != ss2); + + ss2.request_stop(); + + assert(!ss1.stop_requested()); + assert(ss2.stop_requested()); + + ss1.swap(ss2); + + assert(ss1 != ss2); + assert(ss1.stop_requested()); + assert(!ss2.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/assign.copy.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/assign.copy.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/assign.copy.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +#include +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_copy_assignable_v); + +int main(int, char**) { + { + std::stop_token st1; + + std::stop_source source; + auto st2 = source.get_token(); + + assert(st1 != st2); + + source.request_stop(); + + assert(!st1.stop_requested()); + assert(st2.stop_requested()); + + std::same_as decltype(auto) ref = st1 = st2; + assert(&ref == &st1); + + assert(st1 == st2); + assert(st1.stop_requested()); + assert(st2.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/assign.move.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/assign.move.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/assign.move.pass.cpp @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_token& operator=(stop_token&& rhs) noexcept; + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_move_assignable_v); + +int main(int, char**) { + { + std::stop_token st1; + + std::stop_source source; + auto st2 = source.get_token(); + + assert(st1 != st2); + + source.request_stop(); + + assert(!st1.stop_requested()); + assert(st2.stop_requested()); + + std::same_as decltype(auto) ref = st1 = std::move(st2); + assert(&ref == &st1); + + assert(st1 != st2); + assert(st1.stop_requested()); + assert(!st2.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.copy.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.copy.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.copy.pass.cpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_token(const stop_token&) noexcept; + +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_copy_constructible_v); + +int main(int, char**) { + { + std::stop_source source; + auto st = source.get_token(); + std::stop_token copy{st}; + + assert(st == copy); + + assert(st.stop_possible()); + assert(!st.stop_requested()); + + assert(copy.stop_possible()); + assert(!copy.stop_requested()); + + source.request_stop(); + assert(st.stop_possible()); + assert(st.stop_requested()); + + assert(copy.stop_possible()); + assert(copy.stop_requested()); + + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.default.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.default.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.default.pass.cpp @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_token() noexcept; + +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_default_constructible_v); + +int main(int, char**) { + { + std::stop_token st = {}; // implicit + assert(!st.stop_possible()); + assert(!st.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.move.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.move.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/cons.move.pass.cpp @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// stop_token(stop_token&&) noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +static_assert(std::is_nothrow_move_constructible_v); + +int main(int, char**) { + { + std::stop_source source; + auto st = source.get_token(); + + assert(st.stop_possible()); + assert(!st.stop_requested()); + + std::stop_token st2{std::move(st)}; + + assert(!st.stop_possible()); + assert(!st.stop_requested()); + + assert(st2.stop_possible()); + assert(!st2.stop_requested()); + + source.request_stop(); + + assert(!st.stop_possible()); + assert(!st.stop_requested()); + + assert(st2.stop_possible()); + assert(st2.stop_requested()); + + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/equals.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/equals.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/equals.pass.cpp @@ -0,0 +1,87 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] bool operator==(const stop_token& lhs, const stop_token& rhs) noexcept; +// Returns: true if lhs and rhs have ownership of the same stop state or if both lhs and rhs do not have ownership of a stop state; otherwise false. + +// synthesized operator != also tested. + +#include +#include +#include +#include + +#include "test_macros.h" + +// LWG 3254 is related. +template +concept IsNoThrowEqualityComparable = requires(const T& t1, const T& t2) { + { t1 == t2 } noexcept; +}; + +template +concept IsNoThrowInequalityComparable = requires(const T& t1, const T& t2) { + { t1 != t2 } noexcept; +}; + +static_assert(IsNoThrowEqualityComparable); +static_assert(IsNoThrowInequalityComparable); + +int main(int, char**) { + // both no state + { + const std::stop_token st1; + const std::stop_token st2; + assert(st1 == st2); + assert(!(st1 != st2)); + } + + // only one has no state + { + std::stop_source ss; + const std::stop_token st1; + const auto st2 = ss.get_token(); + assert(!(st1 == st2)); + assert(st1 != st2); + } + + // both has states. same source + { + std::stop_source ss; + const auto st1 = ss.get_token(); + const auto st2 = ss.get_token(); + assert(st1 == st2); + assert(!(st1 != st2)); + } + + // both has states. different sources with same states + { + std::stop_source ss1; + auto ss2 = ss1; + const auto st1 = ss1.get_token(); + const auto st2 = ss2.get_token(); + assert(st1 == st2); + assert(!(st1 != st2)); + } + + // both has states. different sources with different states + { + std::stop_source ss1; + std::stop_source ss2; + const auto st1 = ss1.get_token(); + const auto st2 = ss2.get_token(); + assert(!(st1 == st2)); + assert(st1 != st2); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/nodiscard.verify.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/nodiscard.verify.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/nodiscard.verify.cpp @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] bool stop_requested() const noexcept; +// [[nodiscard]] bool stop_possible() const noexcept; +// [[nodiscard]] friend bool operator==(const stop_token& lhs, const stop_token& rhs) noexcept; + +#include + +void test() { + std::stop_token st; + st.stop_requested(); // expected-warning {{ignoring return value of function}} + st.stop_possible(); // expected-warning {{ignoring return value of function}} + operator==(st, st); // expected-warning {{ignoring return value of function}} +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/stop_possible.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/stop_possible.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/stop_possible.pass.cpp @@ -0,0 +1,94 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] bool stop_possible() const noexcept; +// Returns: false if: +// - *this does not have ownership of a stop state, or +// - a stop request was not made and there are no associated stop_source objects; +// otherwise, true. + +#include +#include +#include +#include +#include + +#include "make_test_thread.h" +#include "test_macros.h" + +template +concept IsStopPossibleNoexcept = requires(const T& t) { + { t.stop_possible() } noexcept; +}; + +static_assert(IsStopPossibleNoexcept); + +int main(int, char**) { + // no state + { + const std::stop_token st; + assert(!st.stop_possible()); + } + + // a stop request was not made and there are no associated stop_source objects + { + std::optional ss{std::in_place}; + const auto st = ss->get_token(); + ss.reset(); + + assert(!st.stop_possible()); + } + + // a stop request was not made, but there is an associated stop_source objects + { + std::stop_source ss; + const auto st = ss.get_token(); + assert(st.stop_possible()); + } + + // a stop request was made and there are no associated stop_source objects + { + std::optional ss{std::in_place}; + const auto st = ss->get_token(); + ss->request_stop(); + ss.reset(); + + assert(st.stop_possible()); + } + + // a stop request was made and there is an associated stop_source objects + { + std::stop_source ss; + const auto st = ss.get_token(); + ss.request_stop(); + assert(st.stop_possible()); + } + + // a stop request was made on a different thread and + // there are no associated stop_source objects + { + std::optional ss{std::in_place}; + const auto st = ss->get_token(); + + std::thread t = support::make_test_thread([&]() { + ss->request_stop(); + ss.reset(); + }); + + assert(st.stop_possible()); + t.join(); + assert(st.stop_possible()); + + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/stop_requested.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/stop_requested.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/stop_requested.pass.cpp @@ -0,0 +1,155 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// [[nodiscard]] bool stop_requested() const noexcept; +// Returns: true if *this has ownership of a stop state that has received a stop request; otherwise, false. + +#include +#include +#include +#include +#include +#include + +#include "make_test_thread.h" +#include "test_macros.h" + +template +concept IsStopRequestedNoexcept = requires(const T& t) { + { t.stop_requested() } noexcept; +}; + +static_assert(IsStopRequestedNoexcept); + +int main(int, char**) { + // no state + { + const std::stop_token st; + assert(!st.stop_requested()); + } + + // has state + { + std::stop_source ss; + const auto st = ss.get_token(); + assert(!st.stop_requested()); + + ss.request_stop(); + assert(st.stop_requested()); + } + + // already requested before constructor + { + std::stop_source ss; + ss.request_stop(); + const auto st = ss.get_token(); + assert(st.stop_requested()); + } + + // stop_token should share the state + { + std::optional ss{std::in_place}; + ss->request_stop(); + const auto st = ss->get_token(); + + ss.reset(); + assert(st.stop_requested()); + } + + // single stop_source, multiple stop_token + { + std::stop_source ss; + const auto st1 = ss.get_token(); + const auto st2 = ss.get_token(); + assert(!st1.stop_requested()); + assert(!st2.stop_requested()); + + ss.request_stop(); + assert(st1.stop_requested()); + assert(st2.stop_requested()); + } + + // multiple stop_source, multiple stop_token + { + std::stop_source ss1; + std::stop_source ss2; + + const auto st1 = ss1.get_token(); + const auto st2 = ss2.get_token(); + assert(!st1.stop_requested()); + assert(!st2.stop_requested()); + + ss1.request_stop(); + assert(st1.stop_requested()); + assert(!st2.stop_requested()); + } + + // multiple threads + { + std::stop_source ss; + const auto st = ss.get_token(); + assert(!st.stop_requested()); + + std::thread t = support::make_test_thread([&]() { ss.request_stop(); }); + + t.join(); + assert(st.stop_requested()); + } + + // maybe concurrent calls + { + std::stop_source ss; + const auto st = ss.get_token(); + assert(!st.stop_requested()); + + std::thread t = support::make_test_thread([&]() { ss.request_stop(); }); + + while (!st.stop_requested()) { + // should eventually exit the loop + std::this_thread::yield(); + } + + t.join(); + } + + // [thread.stoptoken.intro] A call to request_stop that returns true + // synchronizes with a call to stop_requested on an associated stop_token + // or stop_source object that returns true. + { + std::stop_source ss; + const auto st = ss.get_token(); + assert(!st.stop_requested()); + + bool flag = false; + + std::thread t = support::make_test_thread([&]() { + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ms); + + // happens-before request_stop + flag = true; + auto b = ss.request_stop(); + assert(b); + }); + + while (!st.stop_requested()) { + std::this_thread::yield(); + } + + // write should be visible to the current thread + assert(flag == true); + + t.join(); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/swap.free.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/swap.free.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/swap.free.pass.cpp @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// friend void swap(stop_token& x, stop_token& y) noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +template +concept IsNoThrowFreeSwappable = requires(T& t) { + { swap(t, t) } noexcept; +}; + +static_assert(IsNoThrowFreeSwappable); + +int main(int, char**) { + { + std::stop_token st1; + + std::stop_source source; + auto st2 = source.get_token(); + + assert(st1 != st2); + + source.request_stop(); + + assert(!st1.stop_requested()); + assert(st2.stop_requested()); + + swap(st1, st2); + + assert(st1 != st2); + assert(st1.stop_requested()); + assert(!st2.stop_requested()); + } + + return 0; +} diff --git a/libcxx/test/std/thread/thread.stoptoken/stoptoken/swap.member.pass.cpp b/libcxx/test/std/thread/thread.stoptoken/stoptoken/swap.member.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/thread/thread.stoptoken/stoptoken/swap.member.pass.cpp @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: no-threads +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing + +// void swap(stop_token& rhs) noexcept; + +#include +#include +#include +#include + +#include "test_macros.h" + +template +concept IsNoThrowMemberSwappable = requires(T& t) { + { t.swap(t) } noexcept; +}; + +static_assert(IsNoThrowMemberSwappable); + +int main(int, char**) { + { + std::stop_token st1; + + std::stop_source source; + auto st2 = source.get_token(); + + assert(st1 != st2); + + source.request_stop(); + + assert(!st1.stop_requested()); + assert(st2.stop_requested()); + + st1.swap(st2); + + assert(st1 != st2); + assert(st1.stop_requested()); + assert(!st2.stop_requested()); + } + + return 0; +} diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -250,6 +250,7 @@ libcxx/include/__chrono/monthday.h libcxx/include/__chrono/month.h libcxx/include/__chrono/month_weekday.h +libcxx/include/__chrono/statically_widen.h libcxx/include/__chrono/steady_clock.h libcxx/include/__chrono/system_clock.h libcxx/include/__chrono/time_point.h @@ -307,8 +308,10 @@ libcxx/include/__debug_utils/randomize_range.h libcxx/include/deque libcxx/include/errno.h +libcxx/include/__exception/exception_ptr.h libcxx/include/expected libcxx/include/__expected/expected.h +libcxx/include/__expected/unexpected.h libcxx/include/experimental/__config libcxx/include/experimental/iterator libcxx/include/experimental/map @@ -488,7 +491,6 @@ libcxx/include/new libcxx/include/__node_handle libcxx/include/numbers -libcxx/include/numeric libcxx/include/__numeric/accumulate.h libcxx/include/__numeric/adjacent_difference.h libcxx/include/__numeric/exclusive_scan.h @@ -504,6 +506,7 @@ libcxx/include/__numeric/transform_reduce.h libcxx/include/optional libcxx/include/ostream +libcxx/include/__pstl/internal/algorithm_impl.h libcxx/include/__pstl/internal/numeric_impl.h libcxx/include/__pstl/internal/omp/parallel_for_each.h libcxx/include/__pstl/internal/omp/parallel_for.h @@ -625,6 +628,9 @@ libcxx/include/__support/win32/locale_win32.h libcxx/include/__support/xlocale/__nop_locale_mgmt.h libcxx/include/__system_error/errc.h +libcxx/include/__system_error/error_category.h +libcxx/include/__system_error/error_code.h +libcxx/include/__system_error/error_condition.h libcxx/include/thread libcxx/include/__threading_support libcxx/include/__thread/poll_with_backoff.h @@ -771,6 +777,7 @@ libcxx/include/__utility/piecewise_construct.h libcxx/include/__utility/priority_tag.h libcxx/include/__utility/rel_ops.h +libcxx/include/__utility/terminate_on_exception.h libcxx/include/__utility/to_underlying.h libcxx/include/__utility/unreachable.h libcxx/include/valarray diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -815,6 +815,7 @@ "semaphore": ["UNSUPPORTED: no-threads"], "shared_mutex": ["UNSUPPORTED: no-threads"], "stdatomic.h": ["UNSUPPORTED: no-threads"], + "stop_token": ["UNSUPPORTED: no-threads, availability-synchronization_library-missing"], "thread": ["UNSUPPORTED: no-threads"], } diff --git a/libcxx/utils/generate_header_inclusion_tests.py b/libcxx/utils/generate_header_inclusion_tests.py --- a/libcxx/utils/generate_header_inclusion_tests.py +++ b/libcxx/utils/generate_header_inclusion_tests.py @@ -73,6 +73,7 @@ "initializer_list": "11", "optional": "17", "ranges": "20", + "stop_token": "20", "string_view": "17", "syncstream": "20", "system_error": "11", diff --git a/libcxx/utils/generate_header_tests.py b/libcxx/utils/generate_header_tests.py --- a/libcxx/utils/generate_header_tests.py +++ b/libcxx/utils/generate_header_tests.py @@ -15,6 +15,7 @@ "semaphore": "!defined(_LIBCPP_HAS_NO_THREADS)", "shared_mutex": "!defined(_LIBCPP_HAS_NO_THREADS)", "stdatomic.h": "__cplusplus > 202002L && !defined(_LIBCPP_HAS_NO_THREADS)", + "stop_token": "!defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_AVAILABILITY_SYNC)", "thread": "!defined(_LIBCPP_HAS_NO_THREADS)", "filesystem": "!defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY)", diff --git a/lldb/include/lldb/Symbol/SymbolContext.h b/lldb/include/lldb/Symbol/SymbolContext.h --- a/lldb/include/lldb/Symbol/SymbolContext.h +++ b/lldb/include/lldb/Symbol/SymbolContext.h @@ -250,8 +250,8 @@ /// For C++ the name is "this", for Objective-C the name is "self". /// /// \return - /// Returns a string for the name of the instance variable. - ConstString GetInstanceVariableName(); + /// Returns a StringRef for the name of the instance variable. + llvm::StringRef GetInstanceVariableName(); /// Sorts the types in TypeMap according to SymbolContext to TypeList /// diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -326,7 +326,7 @@ return ConstString(); } - virtual ConstString GetInstanceVariableName() { return {}; } + virtual llvm::StringRef GetInstanceVariableName() { return {}; } protected: // Classes that inherit from Language can see and modify these diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -165,7 +165,7 @@ ConstString FindBestAlternateFunctionMangledName( const Mangled mangled, const SymbolContext &sym_ctx) const override; - ConstString GetInstanceVariableName() override { return ConstString("this"); } + llvm::StringRef GetInstanceVariableName() override { return "this"; } // PluginInterface protocol llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h @@ -191,7 +191,7 @@ return false; } - ConstString GetInstanceVariableName() override { return ConstString("self"); } + llvm::StringRef GetInstanceVariableName() override { return "self"; } // PluginInterface protocol llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } diff --git a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h --- a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h @@ -40,7 +40,7 @@ static lldb_private::Language *CreateInstance(lldb::LanguageType language); - ConstString GetInstanceVariableName() override { return ConstString("self"); } + llvm::StringRef GetInstanceVariableName() override { return "self"; } static llvm::StringRef GetPluginNameStatic() { return "objcplusplus"; } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -5115,6 +5115,7 @@ case clang::BuiltinType::RvvBool16: case clang::BuiltinType::RvvBool32: case clang::BuiltinType::RvvBool64: + case clang::BuiltinType::RvvInt32m1x2: break; // WebAssembly builtin types. diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -541,7 +541,7 @@ return nullptr; } -ConstString SymbolContext::GetInstanceVariableName() { +llvm::StringRef SymbolContext::GetInstanceVariableName() { LanguageType lang_type = eLanguageTypeUnknown; if (Block *function_block = GetFunctionBlock()) diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp --- a/lldb/source/Target/StackFrame.cpp +++ b/lldb/source/Target/StackFrame.cpp @@ -567,8 +567,9 @@ // Check for direct ivars access which helps us with implicit access to // ivars using "this" or "self". GetSymbolContext(eSymbolContextFunction | eSymbolContextBlock); - if (auto instance_var_name = m_sc.GetInstanceVariableName()) { - var_sp = variable_list->FindVariable(instance_var_name); + llvm::StringRef instance_var_name = m_sc.GetInstanceVariableName(); + if (!instance_var_name.empty()) { + var_sp = variable_list->FindVariable(ConstString(instance_var_name)); if (var_sp) { separator_idx = 0; if (Type *var_type = var_sp->GetType()) diff --git a/lldb/test/API/functionalities/bt-interrupt/TestInterruptBacktrace.py b/lldb/test/API/functionalities/bt-interrupt/TestInterruptBacktrace.py --- a/lldb/test/API/functionalities/bt-interrupt/TestInterruptBacktrace.py +++ b/lldb/test/API/functionalities/bt-interrupt/TestInterruptBacktrace.py @@ -6,12 +6,13 @@ import lldb import lldbsuite.test.lldbutil as lldbutil from lldbsuite.test.lldbtest import * - +from lldbsuite.test.decorators import * class TestInterruptingBacktrace(TestBase): NO_DEBUG_INFO_TESTCASE = True + @skipIf(oslist=["linux"], archs=["arm"]) def test_backtrace_interrupt(self): """Use RequestInterrupt followed by stack operations to ensure correct interrupt behavior for stacks.""" diff --git a/llvm/CODE_OWNERS.TXT b/llvm/CODE_OWNERS.TXT --- a/llvm/CODE_OWNERS.TXT +++ b/llvm/CODE_OWNERS.TXT @@ -160,8 +160,8 @@ E: david.majnemer@gmail.com D: IR Constant Folder, InstCombine -N: Dylan McKay -E: me@dylanmckay.io +N: Ben Shi +E: 2283975856@qq.com, powerman1st@163.com D: AVR Backend N: Kazushi Marukawa diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h --- a/llvm/include/llvm/ADT/PostOrderIterator.h +++ b/llvm/include/llvm/ADT/PostOrderIterator.h @@ -106,13 +106,14 @@ using NodeRef = typename GT::NodeRef; using ChildItTy = typename GT::ChildIteratorType; - // VisitStack - Used to maintain the ordering. Top = current block - // First element is basic block pointer, second is the 'next child' to visit - SmallVector, 8> VisitStack; + /// Used to maintain the ordering. + /// First element is basic block pointer, second is iterator for the next + /// child to visit, third is the end iterator. + SmallVector, 8> VisitStack; po_iterator(NodeRef BB) { this->insertEdge(std::optional(), BB); - VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); + VisitStack.emplace_back(BB, GT::child_begin(BB), GT::child_end(BB)); traverseChild(); } @@ -121,7 +122,7 @@ po_iterator(NodeRef BB, SetType &S) : po_iterator_storage(S) { if (this->insertEdge(std::optional(), BB)) { - VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); + VisitStack.emplace_back(BB, GT::child_begin(BB), GT::child_end(BB)); traverseChild(); } } @@ -131,12 +132,14 @@ } // End is when stack is empty. void traverseChild() { - while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) { - NodeRef BB = *VisitStack.back().second++; - if (this->insertEdge(std::optional(VisitStack.back().first), - BB)) { + while (true) { + auto &[ParentBB, It, End] = VisitStack.back(); + if (It == End) + break; + NodeRef BB = *It++; + if (this->insertEdge(std::optional(ParentBB), BB)) { // If the block is not visited... - VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); + VisitStack.emplace_back(BB, GT::child_begin(BB), GT::child_end(BB)); } } } @@ -158,7 +161,7 @@ } bool operator!=(const po_iterator &x) const { return !(*this == x); } - const NodeRef &operator*() const { return VisitStack.back().first; } + const NodeRef &operator*() const { return std::get<0>(VisitStack.back()); } // This is a nonstandard operator-> that dereferences the pointer an extra // time... so that you can actually call methods ON the BasicBlock, because @@ -167,7 +170,7 @@ NodeRef operator->() const { return **this; } po_iterator &operator++() { // Preincrement - this->finishPostorder(VisitStack.back().first); + this->finishPostorder(std::get<0>(VisitStack.back())); VisitStack.pop_back(); if (!VisitStack.empty()) traverseChild(); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -320,9 +320,11 @@ /// Estimate the cost of a chain of pointers (typically pointer operands of a /// chain of loads or stores within same block) operations set when lowered. + /// \p AccessTy is the type of the loads/stores that will ultimately use the + /// \p Ptrs. InstructionCost getPointersChainCost(ArrayRef Ptrs, const Value *Base, - const PointersChainInfo &Info, + const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind = TTI::TCK_RecipThroughput ) const; @@ -1663,7 +1665,7 @@ TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getPointersChainCost(ArrayRef Ptrs, const Value *Base, - const TTI::PointersChainInfo &Info, + const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0; @@ -2024,8 +2026,9 @@ InstructionCost getPointersChainCost(ArrayRef Ptrs, const Value *Base, const PointersChainInfo &Info, + Type *AccessTy, TargetCostKind CostKind) override { - return Impl.getPointersChainCost(Ptrs, Base, Info, CostKind); + return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind); } unsigned getInliningThresholdMultiplier() override { return Impl.getInliningThresholdMultiplier(); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1041,6 +1041,7 @@ InstructionCost getPointersChainCost(ArrayRef Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, + Type *AccessTy, TTI::TargetCostKind CostKind) { InstructionCost Cost = TTI::TCC_Free; // In the basic model we take into account GEP instructions only diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -356,8 +356,19 @@ } void fabs() { - KnownFPClasses &= (fcPositive | fcNan); - SignBit = false; + if (KnownFPClasses & fcNegZero) + KnownFPClasses |= fcPosZero; + + if (KnownFPClasses & fcNegInf) + KnownFPClasses |= fcPosInf; + + if (KnownFPClasses & fcNegSubnormal) + KnownFPClasses |= fcPosSubnormal; + + if (KnownFPClasses & fcNegNormal) + KnownFPClasses |= fcPosNormal; + + signBitMustBeZero(); } /// Return true if the sign bit must be 0, ignoring the sign of nans. diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2107,12 +2107,12 @@ // Reinterpreting data // -def int_aarch64_sve_convert_from_svbool : DefaultAttrsIntrinsic<[llvm_anyvector_ty], +def int_aarch64_sve_convert_from_svbool : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_nxv16i1_ty], [IntrNoMem]>; def int_aarch64_sve_convert_to_svbool : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], - [llvm_anyvector_ty], + [llvm_any_ty], [IntrNoMem]>; // @@ -2600,6 +2600,46 @@ def int_aarch64_sve_bfdot_lane_v2 : SVE_4Vec_BF16_Indexed; def int_aarch64_sve_bfmlalb_lane_v2 : SVE_4Vec_BF16_Indexed; def int_aarch64_sve_bfmlalt_lane_v2 : SVE_4Vec_BF16_Indexed; + +// +// SVE2.1 - Contiguous loads to multiple consecutive vectors +// + + class SVE2p1_Load_PN_X2_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [llvm_aarch64_svcount_ty, llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; + + class SVE2p1_Load_PN_X4_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_aarch64_svcount_ty, llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_aarch64_sve_ld1_pn_x2 : SVE2p1_Load_PN_X2_Intrinsic; +def int_aarch64_sve_ld1_pn_x4 : SVE2p1_Load_PN_X4_Intrinsic; +def int_aarch64_sve_ldnt1_pn_x2 : SVE2p1_Load_PN_X2_Intrinsic; +def int_aarch64_sve_ldnt1_pn_x4 : SVE2p1_Load_PN_X4_Intrinsic; + +// +// SVE2.1 - Contiguous stores to multiple consecutive vectors +// + + class SVE2p1_Store_PN_X2_Intrinsic + : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMMatchType<0>, + llvm_aarch64_svcount_ty, llvm_ptr_ty ], + [IntrWriteMem, IntrArgMemOnly]>; + + class SVE2p1_Store_PN_X4_Intrinsic + : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + llvm_aarch64_svcount_ty, llvm_ptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; + +def int_aarch64_sve_st1_pn_x2 : SVE2p1_Store_PN_X2_Intrinsic; +def int_aarch64_sve_st1_pn_x4 : SVE2p1_Store_PN_X4_Intrinsic; +def int_aarch64_sve_stnt1_pn_x2 : SVE2p1_Store_PN_X2_Intrinsic; +def int_aarch64_sve_stnt1_pn_x4 : SVE2p1_Store_PN_X4_Intrinsic; } // @@ -2752,9 +2792,9 @@ // def int_aarch64_sve_psel - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, - LLVMMatchType<0>, llvm_i32_ty], + : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], + [llvm_nxv16i1_ty, + llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>; // @@ -2926,6 +2966,21 @@ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class SVE2_VG2_Sel_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [llvm_aarch64_svcount_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>], [IntrNoMem]>; + + class SVE2_VG4_Sel_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_aarch64_svcount_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>], [IntrNoMem]>; + class SME2_CVT_VG2_SINGLE_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], [llvm_anyvector_ty, LLVMMatchType<0>], @@ -3385,4 +3440,9 @@ def int_aarch64_sve_uunpk_x2 : SME2_VG2_Unpk_Intrinsic; def int_aarch64_sve_sunpk_x4 : SME2_VG4_Unpk_Intrinsic; def int_aarch64_sve_uunpk_x4 : SME2_VG4_Unpk_Intrinsic; + + // 2-way and 4-way vector selects + def int_aarch64_sve_sel_x2 : SVE2_VG2_Sel_Intrinsic; + def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic; + } diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h --- a/llvm/include/llvm/MCA/CustomBehaviour.h +++ b/llvm/include/llvm/MCA/CustomBehaviour.h @@ -133,7 +133,7 @@ StringRef getData() const { return Data; } }; -using SharedInstrument = std::shared_ptr; +using UniqueInstrument = std::unique_ptr; /// This class allows targets to optionally customize the logic that resolves /// scheduling class IDs. Targets can use information encoded in Instrument @@ -156,8 +156,8 @@ // Instrument.Desc equal to Type virtual bool supportsInstrumentType(StringRef Type) const { return false; } - /// Allocate an Instrument, and return a shared pointer to it. - virtual SharedInstrument createInstrument(StringRef Desc, StringRef Data); + /// Allocate an Instrument, and return a unique pointer to it. + virtual UniqueInstrument createInstrument(StringRef Desc, StringRef Data); /// Given an MCInst and a vector of Instrument, a target can /// return a SchedClassID. This can be used by a subtarget to return a @@ -165,9 +165,8 @@ /// BaseInstruction This can be useful when a BaseInstruction does not convey /// the correct scheduling information without additional data. By default, /// it returns the SchedClassID that belongs to MCI. - virtual unsigned - getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, - const SmallVector &IVec) const; + virtual unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, + const SmallVector &IVec) const; }; } // namespace mca diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h --- a/llvm/include/llvm/MCA/InstrBuilder.h +++ b/llvm/include/llvm/MCA/InstrBuilder.h @@ -84,11 +84,10 @@ InstRecycleCallback InstRecycleCB; Expected - createInstrDescImpl(const MCInst &MCI, - const SmallVector &IVec); + createInstrDescImpl(const MCInst &MCI, const SmallVector &IVec); Expected getOrCreateInstrDesc(const MCInst &MCI, - const SmallVector &IVec); + const SmallVector &IVec); InstrBuilder(const InstrBuilder &) = delete; InstrBuilder &operator=(const InstrBuilder &) = delete; @@ -114,8 +113,7 @@ void setInstRecycleCallback(InstRecycleCallback CB) { InstRecycleCB = CB; } Expected> - createInstruction(const MCInst &MCI, - const SmallVector &IVec); + createInstruction(const MCInst &MCI, const SmallVector &IVec); }; } // namespace mca } // namespace llvm diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h --- a/llvm/include/llvm/Pass.h +++ b/llvm/include/llvm/Pass.h @@ -28,6 +28,9 @@ #ifndef LLVM_PASS_H #define LLVM_PASS_H +#ifdef EXPENSIVE_CHECKS +#include +#endif #include namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -48,9 +48,9 @@ #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H #define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -126,6 +126,7 @@ FunctionAnalysisManager *FAM; /// Analyses used to help determine if a function should be specialized. + std::function GetBFI; std::function GetTLI; std::function GetTTI; std::function GetAC; @@ -137,11 +138,12 @@ public: FunctionSpecializer( SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM, + std::function GetBFI, std::function GetTLI, std::function GetTTI, std::function GetAC) - : Solver(Solver), M(M), FAM(FAM), GetTLI(GetTLI), GetTTI(GetTTI), - GetAC(GetAC) {} + : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI), + GetTTI(GetTTI), GetAC(GetAC) {} ~FunctionSpecializer(); @@ -193,7 +195,7 @@ Cost getSpecializationCost(Function *F); /// Compute a bonus for replacing argument \p A with constant \p C. - Cost getSpecializationBonus(Argument *A, Constant *C, const LoopInfo &LI); + Cost getSpecializationBonus(Argument *A, Constant *C); /// Determine if it is possible to specialise the function for constant values /// of the formal parameter \p A. diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -44,7 +44,6 @@ std::unique_ptr PredInfo; DominatorTree *DT; PostDominatorTree *PDT; - LoopInfo *LI; }; /// Helper struct shared between Function Specialization and SCCP Solver. @@ -91,8 +90,6 @@ const PredicateBase *getPredicateInfoFor(Instruction *I); - const LoopInfo &getLoopInfo(Function &F); - DomTreeUpdater getDTU(Function &F); /// trackValueOfGlobalVariable - Clients can use this method to diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -135,10 +135,10 @@ #define DEBUG_TYPE "scalar-evolution" -STATISTIC(NumTripCountsComputed, - "Number of loops with predictable loop counts"); -STATISTIC(NumTripCountsNotComputed, - "Number of loops without predictable loop counts"); +STATISTIC(NumExitCountsComputed, + "Number of loop exits with predictable exit counts"); +STATISTIC(NumExitCountsNotComputed, + "Number of loop exits without predictable exit counts"); STATISTIC(NumBruteForceTripCountsComputed, "Number of loops with trip counts computed by force"); @@ -8450,23 +8450,6 @@ // must be cleared in this scope. BackedgeTakenInfo Result = computeBackedgeTakenCount(L); - // In product build, there are no usage of statistic. - (void)NumTripCountsComputed; - (void)NumTripCountsNotComputed; -#if LLVM_ENABLE_STATS || !defined(NDEBUG) - const SCEV *BEExact = Result.getExact(L, this); - if (BEExact != getCouldNotCompute()) { - assert(isLoopInvariant(BEExact, L) && - isLoopInvariant(Result.getConstantMax(this), L) && - "Computed backedge-taken count isn't loop invariant for loop!"); - ++NumTripCountsComputed; - } else if (Result.getConstantMax(this) == getCouldNotCompute() && - isa(L->getHeader()->begin())) { - // Only count loops that have phi nodes as not being computable. - ++NumTripCountsNotComputed; - } -#endif // LLVM_ENABLE_STATS || !defined(NDEBUG) - // Now that we know more about the trip count for this loop, forget any // existing SCEV values for PHI nodes in this loop since they are only // conservative estimates made without the benefit of trip count @@ -8852,7 +8835,9 @@ // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. - if (EL.ExactNotTaken == getCouldNotCompute()) + if (EL.ExactNotTaken != getCouldNotCompute()) + ++NumExitCountsComputed; + else // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; @@ -8860,9 +8845,11 @@ // Exact always implies symbolic, only check symbolic. if (EL.SymbolicMaxNotTaken != getCouldNotCompute()) ExitCounts.emplace_back(ExitBB, EL); - else + else { assert(EL.ExactNotTaken == getCouldNotCompute() && "Exact is known but symbolic isn't?"); + ++NumExitCountsNotComputed; + } // 2. Derive the loop's MaxBECount from each exit's max number of // non-exiting iterations. Partition the loop exits into two kinds: diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -230,10 +230,11 @@ InstructionCost TargetTransformInfo::getPointersChainCost( ArrayRef Ptrs, const Value *Base, - const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) const { + const TTI::PointersChainInfo &Info, Type *AccessTy, + TTI::TargetCostKind CostKind) const { assert((Base || !Info.isSameBase()) && "If pointers have same base address it has to be provided."); - return TTIImpl->getPointersChainCost(Ptrs, Base, Info, CostKind); + return TTIImpl->getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind); } unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters( diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3817,18 +3817,12 @@ switch (IID) { default: break; - // sqrt(-0.0) = -0.0, no other negative results are possible. case Intrinsic::sqrt: + case Intrinsic::experimental_constrained_sqrt: + // sqrt(-0.0) = -0.0, no other negative results are possible. + // FIXME: Account for denormal-fp-math=preserve-sign denormal inputs case Intrinsic::canonicalize: return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1); - case Intrinsic::experimental_constrained_sqrt: { - // NOTE: This rounding mode restriction may be too strict. - const auto *CI = cast(Call); - if (CI->getRoundingMode() == RoundingMode::NearestTiesToEven) - return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1); - else - return false; - } // fabs(x) != -0.0 case Intrinsic::fabs: return true; diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -70,13 +70,16 @@ } void fixPLTEdge(Edge &E, Symbol &PLTStubs) { - assert(E.getKind() == R_RISCV_CALL_PLT && "Not a R_RISCV_CALL_PLT edge?"); + assert((E.getKind() == R_RISCV_CALL || E.getKind() == R_RISCV_CALL_PLT || + E.getKind() == CallRelaxable) && + "Not a PLT edge?"); E.setKind(R_RISCV_CALL); E.setTarget(PLTStubs); } bool isExternalBranchEdge(Edge &E) const { - return (E.getKind() == R_RISCV_CALL || E.getKind() == R_RISCV_CALL_PLT) && + return (E.getKind() == R_RISCV_CALL || E.getKind() == R_RISCV_CALL_PLT || + E.getKind() == CallRelaxable) && !E.getTarget().isDefined(); } diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -479,7 +479,9 @@ assert(isInt<16>(BranchImm)); - *TargetPtr &= 0xfff8001fU; + uint32_t RawInstr = *(support::little32_t *)TargetPtr; + *(support::little32_t *)TargetPtr = RawInstr & 0xfff8001fU; + // Immediate:15:2 goes in bits 18:5 of TBZ, TBNZ or32le(TargetPtr, (BranchImm & 0x0000FFFC) << 3); break; diff --git a/llvm/lib/MCA/CustomBehaviour.cpp b/llvm/lib/MCA/CustomBehaviour.cpp --- a/llvm/lib/MCA/CustomBehaviour.cpp +++ b/llvm/lib/MCA/CustomBehaviour.cpp @@ -42,14 +42,14 @@ return std::vector>(); } -SharedInstrument InstrumentManager::createInstrument(llvm::StringRef Desc, +UniqueInstrument InstrumentManager::createInstrument(llvm::StringRef Desc, llvm::StringRef Data) { - return std::make_shared(Desc, Data); + return std::make_unique(Desc, Data); } unsigned InstrumentManager::getSchedClassID( const MCInstrInfo &MCII, const MCInst &MCI, - const llvm::SmallVector &IVec) const { + const llvm::SmallVector &IVec) const { return MCII.get(MCI.getOpcode()).getSchedClass(); } diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -511,7 +511,7 @@ Expected InstrBuilder::createInstrDescImpl(const MCInst &MCI, - const SmallVector &IVec) { + const SmallVector &IVec) { assert(STI.getSchedModel().hasInstrSchedModel() && "Itineraries are not yet supported!"); @@ -601,7 +601,7 @@ Expected InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI, - const SmallVector &IVec) { + const SmallVector &IVec) { // Cache lookup using SchedClassID from Instrumentation unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec); @@ -622,7 +622,7 @@ Expected> InstrBuilder::createInstruction(const MCInst &MCI, - const SmallVector &IVec) { + const SmallVector &IVec) { Expected DescOrErr = getOrCreateInstrDesc(MCI, IVec); if (!DescOrErr) return DescOrErr.takeError(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -370,8 +370,12 @@ void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_rr, unsigned Opc_ri, bool IsIntr = false); + void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs, + unsigned Scale, unsigned Opc_rr, + unsigned Opc_ri); void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, - bool IsZmMulti, unsigned Opcode); + bool IsZmMulti, unsigned Opcode, + bool HasPred = false); void SelectPExtPair(SDNode *N, unsigned Opc); void SelectWhilePair(SDNode *N, unsigned Opc); void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); @@ -1709,11 +1713,13 @@ void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, bool IsZmMulti, - unsigned Opcode) { + unsigned Opcode, + bool HasPred) { assert(Opcode != 0 && "Unexpected opcode"); SDLoc DL(N); EVT VT = N->getValueType(0); + unsigned FirstVecIdx = HasPred ? 2 : 1; auto GetMultiVecOperand = [=](unsigned StartIdx) { SmallVector Regs(N->op_begin() + StartIdx, @@ -1721,16 +1727,20 @@ return createZMulTuple(Regs); }; - SDValue Zdn = GetMultiVecOperand(1); + SDValue Zdn = GetMultiVecOperand(FirstVecIdx); SDValue Zm; if (IsZmMulti) - Zm = GetMultiVecOperand(NumVecs + 1); + Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); else - Zm = N->getOperand(NumVecs + 1); - - SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); + Zm = N->getOperand(NumVecs + FirstVecIdx); + SDNode *Intrinsic; + if (HasPred) + Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, + N->getOperand(1), Zdn, Zm); + else + Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); SDValue SuperReg = SDValue(Intrinsic, 0); for (unsigned i = 0; i < NumVecs; ++i) ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( @@ -1772,6 +1782,39 @@ CurDAG->RemoveDeadNode(N); } +void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N, + unsigned NumVecs, + unsigned Scale, + unsigned Opc_ri, + unsigned Opc_rr) { + assert(Scale < 4 && "Invalid scaling value."); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); + + // Use simplest addressing mode for now - base + 0 offset + SDValue PNg = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); + + SDValue Ops[] = {PNg, // Predicate-as-counter + Base, // Memory operand + Offset, Chain}; + + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; + + SDNode *Load = CurDAG->getMachineNode(Opc_ri, DL, ResTys, Ops); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned i = 0; i < NumVecs; ++i) + ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( + AArch64::zsub0 + i, DL, VT, SuperReg)); + + // Copy chain + unsigned ChainIdx = NumVecs; + ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); + CurDAG->RemoveDeadNode(N); +} + void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode) { if (N->getValueType(0) != MVT::nxv4f32) @@ -4648,6 +4691,74 @@ } break; } + case Intrinsic::aarch64_sve_ld1_pn_x2: { + if (VT == MVT::nxv16i8) { + SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + VT == MVT::nxv8bf16) { + SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z); + return; + } + break; + } + case Intrinsic::aarch64_sve_ld1_pn_x4: { + if (VT == MVT::nxv16i8) { + SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + VT == MVT::nxv8bf16) { + SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z); + return; + } + break; + } + case Intrinsic::aarch64_sve_ldnt1_pn_x2: { + if (VT == MVT::nxv16i8) { + SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + VT == MVT::nxv8bf16) { + SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z); + return; + } + break; + } + case Intrinsic::aarch64_sve_ldnt1_pn_x4: { + if (VT == MVT::nxv16i8) { + SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + VT == MVT::nxv8bf16) { + SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z); + return; + } + break; + } case Intrinsic::aarch64_sve_ld3_sret: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, @@ -5330,6 +5441,20 @@ SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, AArch64::UZP_VG4_4Z4Z_Q); return; + case Intrinsic::aarch64_sve_sel_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H, + AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true); + return; + case Intrinsic::aarch64_sve_sel_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H, + AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true); + return; case Intrinsic::aarch64_sve_frinta_x2: SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S); return; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5042,8 +5042,12 @@ case Intrinsic::aarch64_sve_dupq_lane: return LowerDUPQLane(Op, DAG); case Intrinsic::aarch64_sve_convert_from_svbool: + if (Op.getValueType() == MVT::aarch64svcount) + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Op.getOperand(1)); return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG); case Intrinsic::aarch64_sve_convert_to_svbool: + if (Op.getOperand(1).getValueType() == MVT::aarch64svcount) + return DAG.getNode(ISD::BITCAST, dl, MVT::nxv16i1, Op.getOperand(1)); return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG); case Intrinsic::aarch64_sve_fneg: return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(), diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -632,8 +632,8 @@ defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101, int_aarch64_sve_uqrshrn_x4>; defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110, int_aarch64_sve_sqrshrun_x4>; -defm SEL_VG2_2ZP2Z2Z: sme2_sel_vector_vg2<"sel">; -defm SEL_VG4_4ZP4Z4Z: sme2_sel_vector_vg4<"sel">; +defm SEL_VG2_2ZC2Z2Z: sme2_sel_vector_vg2<"sel">; +defm SEL_VG4_4ZC4Z4Z: sme2_sel_vector_vg4<"sel">; def LD1B_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "ld1b">; def LD1B_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "ld1b">; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3837,6 +3837,59 @@ defm STNT1W_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1w", 0b10, 0b1, ZZZZ_s_mul_r>; defm STNT1D_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1d", 0b11, 0b1, ZZZZ_d_mul_r>; +multiclass store_pn_x2 { + def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1), + (aarch64svcount PPR:$PNg), GPR64:$base), + (RegImmInst (REG_SEQUENCE ZPR2Mul2, Ty:$vec0, zsub0, Ty:$vec1, zsub1), + PPR:$PNg, GPR64:$base, (i64 0))>; +} + +// Stores of 2 consecutive vectors +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; +defm : store_pn_x2; + +multiclass store_pn_x4 { + def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1), (Ty ZPR:$vec2), (Ty ZPR:$vec3), + (aarch64svcount PPR:$PNg), GPR64:$base), + (RegImmInst (REG_SEQUENCE ZPR4Mul4, Ty:$vec0, zsub0, Ty:$vec1, zsub1, + Ty:$vec2, zsub2, Ty:$vec3, zsub3), + PPR:$PNg, GPR64:$base, (i64 0))>; +} + +// Stores of 4 consecutive vectors +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; +defm : store_pn_x4; + defm WHILEGE_2PXX : sve2p1_int_while_rr_pair<"whilege", 0b000>; defm WHILEGT_2PXX : sve2p1_int_while_rr_pair<"whilegt", 0b001>; defm WHILELT_2PXX : sve2p1_int_while_rr_pair<"whilelt", 0b010>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -737,6 +737,11 @@ if (auto BinOpCombine = tryCombineFromSVBoolBinOp(IC, II)) return BinOpCombine; + // Ignore converts to/from svcount_t. + if (isa(II.getArgOperand(0)->getType()) || + isa(II.getType())) + return std::nullopt; + SmallVector CandidatesForRemoval; Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1309,30 +1309,30 @@ (!cast(NAME # _D) PNRAny:$Pd, PNRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>; - def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm), + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _B) $Pn, $Pm, $idx, 0)>; - def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm), + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _H) $Pn, $Pm, $idx, 0)>; - def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm), + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _S) $Pn, $Pm, $idx, 0)>; - def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm), + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _D) $Pn, $Pm, $idx, 0)>; let AddedComplexity = 1 in { - def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm), + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))), (!cast(NAME # _B) $Pn, $Pm, $idx, $imm)>; - def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm), + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm), (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))), (!cast(NAME # _H) $Pn, $Pm, $idx, $imm)>; - def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm), + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm), (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))), (!cast(NAME # _S) $Pn, $Pm, $idx, $imm)>; - def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm), + def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm), (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))), (!cast(NAME # _D) $Pn, $Pm, $idx, $imm)>; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -1556,6 +1556,73 @@ return BreakPhiNodesCache[&I] = true; } +/// Helper class for "break large PHIs" (visitPHINode). +/// +/// This represents a slice of a PHI's incoming value, which is made up of: +/// - The type of the slice (Ty) +/// - The index in the incoming value's vector where the slice starts (Idx) +/// - The number of elements in the slice (NumElts). +/// It also keeps track of the NewPHI node inserted for this particular slice. +/// +/// Slice examples: +/// <4 x i64> -> Split into four i64 slices. +/// -> [i64, 0, 1], [i64, 1, 1], [i64, 2, 1], [i64, 3, 1] +/// <5 x i16> -> Split into 2 <2 x i16> slices + a i16 tail. +/// -> [<2 x i16>, 0, 2], [<2 x i16>, 2, 2], [i16, 4, 1] +class VectorSlice { +public: + VectorSlice(Type *Ty, unsigned Idx, unsigned NumElts) + : Ty(Ty), Idx(Idx), NumElts(NumElts) {} + + Type *Ty = nullptr; + unsigned Idx = 0; + unsigned NumElts = 0; + PHINode *NewPHI = nullptr; + + /// Slice \p Inc according to the information contained within this slice. + /// This is cached, so if called multiple times for the same \p BB & \p Inc + /// pair, it returns the same Sliced value as well. + /// + /// Note this *intentionally* does not return the same value for, say, + /// [%bb.0, %0] & [%bb.1, %0] as: + /// - It could cause issues with dominance (e.g. if bb.1 is seen first, then + /// the value in bb.1 may not be reachable from bb.0 if it's its + /// predecessor.) + /// - We also want to make our extract instructions as local as possible so + /// the DAG has better chances of folding them out. Duplicating them like + /// that is beneficial in that regard. + /// + /// This is both a minor optimization to avoid creating duplicate + /// instructions, but also a requirement for correctness. It is not forbidden + /// for a PHI node to have the same [BB, Val] pair multiple times. If we + /// returned a new value each time, those previously identical pairs would all + /// have different incoming values (from the same block) and it'd cause a "PHI + /// node has multiple entries for the same basic block with different incoming + /// values!" verifier error. + Value *getSlicedVal(BasicBlock *BB, Value *Inc, StringRef NewValName) { + Value *&Res = SlicedVals[{BB, Inc}]; + if (Res) + return Res; + + IRBuilder<> B(BB->getTerminator()); + if (Instruction *IncInst = dyn_cast(Inc)) + B.SetCurrentDebugLocation(IncInst->getDebugLoc()); + + if (NumElts > 1) { + SmallVector Mask; + for (unsigned K = Idx; K < (Idx + NumElts); ++K) + Mask.push_back(K); + Res = B.CreateShuffleVector(Inc, Mask, NewValName); + } else + Res = B.CreateExtractElement(Inc, Idx, NewValName); + + return Res; + } + +private: + SmallDenseMap, Value *> SlicedVals; +}; + bool AMDGPUCodeGenPrepare::visitPHINode(PHINode &I) { // Break-up fixed-vector PHIs into smaller pieces. // Default threshold is 32, so it breaks up any vector that's >32 bits into @@ -1577,14 +1644,6 @@ if (!ForceScalarizeLargePHIs && !canBreakPHINode(I)) return false; - struct VectorSlice { - Type *Ty = nullptr; - unsigned Idx = 0; - unsigned NumElts = 0; - std::vector IncomingValues = {}; - PHINode *NewPHI = nullptr; - }; - std::vector Slices; Type *EltTy = FVT->getElementType(); @@ -1599,47 +1658,36 @@ Type *SubVecTy = FixedVectorType::get(EltTy, SubVecSize); for (unsigned End = alignDown(NumElts, SubVecSize); Idx < End; Idx += SubVecSize) - Slices.push_back(VectorSlice{SubVecTy, Idx, SubVecSize}); + Slices.emplace_back(SubVecTy, Idx, SubVecSize); } // Scalarize all remaining elements. for (; Idx < NumElts; ++Idx) - Slices.push_back(VectorSlice{EltTy, Idx, 1}); + Slices.emplace_back(EltTy, Idx, 1); } if (Slices.size() == 1) return false; - // Break up this PHI's incoming values. - for (unsigned Idx = 0; Idx < I.getNumIncomingValues(); ++Idx) { - Value *Inc = I.getIncomingValue(Idx); - - IRBuilder<> B(I.getIncomingBlock(Idx)->getTerminator()); - if (Instruction *IncInst = dyn_cast(Inc)) - B.SetCurrentDebugLocation(IncInst->getDebugLoc()); - - unsigned NameSuffix = 0; - for (VectorSlice &S : Slices) { - const auto ValName = - "largephi.extractslice" + std::to_string(NameSuffix++); - if (S.NumElts > 1) { - SmallVector Mask; - for (unsigned K = S.Idx; K < (S.Idx + S.NumElts); ++K) - Mask.push_back(K); - S.IncomingValues.push_back(B.CreateShuffleVector(Inc, Mask, ValName)); - } else - S.IncomingValues.push_back(B.CreateExtractElement(Inc, S.Idx, ValName)); - } - } - - // Now create one PHI per vector piece. - IRBuilder<> B(I.getParent()->getFirstNonPHI()); + // Create one PHI per vector piece. The "VectorSlice" class takes care of + // creating the necessary instruction to extract the relevant slices of each + // incoming value. + IRBuilder<> B(I.getParent()); B.SetCurrentDebugLocation(I.getDebugLoc()); + unsigned IncNameSuffix = 0; for (VectorSlice &S : Slices) { + // We need to reset the build on each iteration, because getSlicedVal may + // have inserted something into I's BB. + B.SetInsertPoint(I.getParent()->getFirstNonPHI()); S.NewPHI = B.CreatePHI(S.Ty, I.getNumIncomingValues()); - for (const auto &[Idx, BB] : enumerate(I.blocks())) - S.NewPHI->addIncoming(S.IncomingValues[Idx], BB); + + for (const auto &[Idx, BB] : enumerate(I.blocks())) { + S.NewPHI->addIncoming(S.getSlicedVal(BB, I.getIncomingValue(Idx), + "largephi.extractslice" + + std::to_string(IncNameSuffix++)), + BB); + } } // And replace this PHI with a vector of all the previous PHI values. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4678,7 +4678,10 @@ SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); - assert(VT == MVT::v4f16 || VT == MVT::v4i16); + assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 || + VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 || + VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || + VT == MVT::v32f32); SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -634,11 +634,11 @@ assert(MBB.getParent()->getRegInfo().isReserved(Tmp) && "VGPR used for an intermediate copy should have been reserved."); - // Only loop through if there are any free registers left, otherwise - // scavenger may report a fatal error without emergency spill slot - // or spill with the slot. - while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) { - Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0); + // Only loop through if there are any free registers left. We don't want to + // spill. + while (RegNo--) { + Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0, + /* AllowSpill */ false); if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs) break; Tmp = Tmp2; @@ -7919,9 +7919,10 @@ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); // If available, prefer to use vcc. - Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC) - ? Register(RI.getVCC()) - : RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + Register UnusedCarry = + !RS.isRegUsed(AMDGPU::VCC) + ? Register(RI.getVCC()) + : RS.scavengeRegister(RI.getBoolRC(), I, 0, /* AllowSpill */ false); // TODO: Users need to deal with this. if (!UnusedCarry.isValid()) diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -1539,7 +1539,11 @@ assert(MI->getNumExplicitOperands() == 2); } - MI->setDesc(TII->get(AMDGPU::COPY)); + unsigned CopyOp = MI->getOperand(1).isReg() + ? AMDGPU::COPY + : TII->getMovOpcode(TRI->getRegClassForOperandReg( + *MRI, MI->getOperand(0))); + MI->setDesc(TII->get(CopyOp)); } } diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h @@ -47,13 +47,13 @@ bool supportsInstrumentType(StringRef Type) const override; /// Create a Instrument for RISC-V target - SharedInstrument createInstrument(StringRef Desc, StringRef Data) override; + UniqueInstrument createInstrument(StringRef Desc, StringRef Data) override; /// Using the Instrument, returns a SchedClassID to use instead of /// the SchedClassID that belongs to the MCI or the original SchedClassID. unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, - const SmallVector &IVec) const override; + const SmallVector &IVec) const override; }; } // namespace mca diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -73,7 +73,7 @@ return Type == RISCVLMULInstrument::DESC_NAME; } -SharedInstrument +UniqueInstrument RISCVInstrumentManager::createInstrument(llvm::StringRef Desc, llvm::StringRef Data) { if (Desc != RISCVLMULInstrument::DESC_NAME) { @@ -86,19 +86,19 @@ << Data << '\n'); return nullptr; } - return std::make_shared(Data); + return std::make_unique(Data); } unsigned RISCVInstrumentManager::getSchedClassID( const MCInstrInfo &MCII, const MCInst &MCI, - const llvm::SmallVector &IVec) const { + const llvm::SmallVector &IVec) const { unsigned short Opcode = MCI.getOpcode(); unsigned SchedClassID = MCII.get(Opcode).getSchedClass(); for (const auto &I : IVec) { // Unknown Instrument kind if (I->getDesc() == RISCVLMULInstrument::DESC_NAME) { - uint8_t LMUL = static_cast(I.get())->getLMUL(); + uint8_t LMUL = static_cast(I)->getLMUL(); const RISCVVInversePseudosTable::PseudoInfo *RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL); // Not a RVV instr diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -342,9 +342,7 @@ defvar vbool64_t = nxv1i1; // There is no need to define register classes for fractional LMUL. -def LMULList { - list m = [1, 2, 4, 8]; -} +defvar LMULList = [1, 2, 4, 8]; //===----------------------------------------------------------------------===// // Utility classes for segment load/store. @@ -576,7 +574,7 @@ (add (sequence "V%u", 8, 31), (sequence "V%u", 0, 7)), 1>; -foreach m = LMULList.m in { +foreach m = LMULList in { foreach nf = NFList.L in { def "VRN" # nf # "M" # m # "NoV0": VReg<[untyped], (add !cast("VN" # nf # "M" # m # "NoV0")), diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -106,6 +106,12 @@ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind); + InstructionCost getPointersChainCost(ArrayRef Ptrs, + const Value *Base, + const TTI::PointersChainInfo &Info, + Type *AccessTy, + TTI::TargetCostKind CostKind); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1592,6 +1592,55 @@ } } +// TODO: Deduplicate from TargetTransformInfoImplCRTPBase. +InstructionCost RISCVTTIImpl::getPointersChainCost( + ArrayRef Ptrs, const Value *Base, + const TTI::PointersChainInfo &Info, Type *AccessTy, + TTI::TargetCostKind CostKind) { + InstructionCost Cost = TTI::TCC_Free; + // In the basic model we take into account GEP instructions only + // (although here can come alloca instruction, a value, constants and/or + // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a + // pointer). Typically, if Base is a not a GEP-instruction and all the + // pointers are relative to the same base address, all the rest are + // either GEP instructions, PHIs, bitcasts or constants. When we have same + // base, we just calculate cost of each non-Base GEP as an ADD operation if + // any their index is a non-const. + // If no known dependecies between the pointers cost is calculated as a sum + // of costs of GEP instructions. + for (auto [I, V] : enumerate(Ptrs)) { + const auto *GEP = dyn_cast(V); + if (!GEP) + continue; + if (Info.isSameBase() && V != Base) { + if (GEP->hasAllConstantIndices()) + continue; + // If the chain is unit-stride and BaseReg + stride*i is a legal + // addressing mode, then presume the base GEP is sitting around in a + // register somewhere and check if we can fold the offset relative to + // it. + unsigned Stride = DL.getTypeStoreSize(AccessTy); + if (Info.isUnitStride() && + isLegalAddressingMode(AccessTy, + /* BaseGV */ nullptr, + /* BaseOffset */ Stride * I, + /* HasBaseReg */ true, + /* Scale */ 0, + GEP->getType()->getPointerAddressSpace())) + continue; + Cost += getArithmeticInstrCost(Instruction::Add, GEP->getType(), CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, + std::nullopt); + } else { + SmallVector Indices(GEP->indices()); + Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), + Indices, CostKind); + } + } + return Cost; +} + void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -529,7 +529,7 @@ } if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow()) - setOperationAction(ISD::PREFETCH , MVT::Other, Legal); + setOperationAction(ISD::PREFETCH , MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); @@ -33984,6 +33984,18 @@ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); } +static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + unsigned IsData = cast(Op.getOperand(4))->getZExtValue(); + + // We don't support non-data prefetch without PREFETCHI. + // Just preserve the chain. + if (!IsData && !Subtarget.hasPREFETCHI()) + return Op.getOperand(0); + + return Op; +} + static StringRef getInstrStrFromOpNo(const SmallVectorImpl &AsmStrs, unsigned OpNo) { const APInt Operand(32, OpNo); @@ -34188,6 +34200,7 @@ case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION(Op, DAG); case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG); case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG); + case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG); } } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -180,6 +180,7 @@ InstructionCost getPointersChainCost(ArrayRef Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, + Type *AccessTy, TTI::TargetCostKind CostKind); InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4943,9 +4943,11 @@ return Cost + LT.first; } -InstructionCost X86TTIImpl::getPointersChainCost( - ArrayRef Ptrs, const Value *Base, - const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) { +InstructionCost +X86TTIImpl::getPointersChainCost(ArrayRef Ptrs, + const Value *Base, + const TTI::PointersChainInfo &Info, + Type *AccessTy, TTI::TargetCostKind CostKind) { if (Info.isSameBase() && Info.isKnownStride()) { // If all the pointers have known stride all the differences are translated // into constants. X86 memory addressing allows encoding it into @@ -4957,7 +4959,7 @@ } return TTI::TCC_Free; } - return BaseT::getPointersChainCost(Ptrs, Base, Info, CostKind); + return BaseT::getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind); } InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty, diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -49,7 +49,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" @@ -82,10 +81,6 @@ "Don't specialize functions that have less than this number of " "instructions")); -static cl::opt AvgLoopIters( - "funcspec-avg-loop-iters", cl::init(10), cl::Hidden, cl::desc( - "Average loop iteration count")); - static cl::opt SpecializeOnAddress( "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc( "Enable function specialization on the address of global values")); @@ -502,8 +497,7 @@ // Calculate the specialisation gain. Cost Score = 0 - SpecCost; for (ArgInfo &A : S.Args) - Score += - getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F)); + Score += getSpecializationBonus(A.Formal, A.Actual); // Discard unprofitable specialisations. if (!ForceSpecialization && Score <= 0) @@ -594,41 +588,42 @@ } static Cost getUserBonus(User *U, TargetTransformInfo &TTI, - const LoopInfo &LI) { + BlockFrequencyInfo &BFI) { auto *I = dyn_cast_or_null(U); // If not an instruction we do not know how to evaluate. // Keep minimum possible cost for now so that it doesnt affect // specialization. if (!I) - return std::numeric_limits::min(); + return 0; - Cost Bonus = - TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); + uint64_t Weight = BFI.getBlockFreq(I->getParent()).getFrequency() / + BFI.getEntryFreq(); + if (!Weight) + return 0; - // Increase the cost if it is inside the loop. - unsigned LoopDepth = LI.getLoopDepth(I->getParent()); - Bonus *= std::pow((double)AvgLoopIters, LoopDepth); + Cost Bonus = Weight * + TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); // Traverse recursively if there are more uses. // TODO: Any other instructions to be added here? if (I->mayReadFromMemory() || I->isCast()) for (auto *User : I->users()) - Bonus += getUserBonus(User, TTI, LI); + Bonus += getUserBonus(User, TTI, BFI); return Bonus; } /// Compute a bonus for replacing argument \p A with constant \p C. -Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, - const LoopInfo &LI) { +Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C) { Function *F = A->getParent(); auto &TTI = (GetTTI)(*F); + auto &BFI = (GetBFI)(*F); LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " << C->getNameOrAsOperand() << "\n"); Cost TotalCost = 0; for (auto *U : A->users()) { - TotalCost += getUserBonus(U, TTI, LI); + TotalCost += getUserBonus(U, TTI, BFI); LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); } diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -13,7 +13,7 @@ #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -107,13 +107,15 @@ static bool runIPSCCP( Module &M, const DataLayout &DL, FunctionAnalysisManager *FAM, + std::function GetBFI, std::function GetTLI, std::function GetTTI, std::function GetAC, function_ref getAnalysis, bool IsFuncSpecEnabled) { SCCPSolver Solver(DL, GetTLI, M.getContext()); - FunctionSpecializer Specializer(Solver, M, FAM, GetTLI, GetTTI, GetAC); + FunctionSpecializer Specializer(Solver, M, FAM, GetBFI, GetTLI, GetTTI, + GetAC); // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. @@ -381,21 +383,23 @@ auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & { return FAM.getResult(F); }; + auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult(F); + }; auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult(F); }; auto GetAC = [&FAM](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - auto getAnalysis = [&FAM, this](Function &F) -> AnalysisResultsForFn { + auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = FAM.getResult(F); return { std::make_unique(F, DT, FAM.getResult(F)), - &DT, FAM.getCachedResult(F), - isFuncSpecEnabled() ? &FAM.getResult(F) : nullptr }; + &DT, FAM.getCachedResult(F) }; }; - if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, getAnalysis, + if (!runIPSCCP(M, DL, &FAM, GetBFI, GetTLI, GetTTI, GetAC, getAnalysis, isFuncSpecEnabled())) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1611,6 +1611,17 @@ if (!OtherBr || BBI == OtherBB->begin()) return false; + auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool { + if (!OtherStore || + OtherStore->getPointerOperand() != SI.getPointerOperand()) + return false; + + auto *SIVTy = SI.getValueOperand()->getType(); + auto *OSVTy = OtherStore->getValueOperand()->getType(); + return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) && + SI.hasSameSpecialState(OtherStore); + }; + // If the other block ends in an unconditional branch, check for the 'if then // else' case. There is an instruction before the branch. StoreInst *OtherStore = nullptr; @@ -1626,8 +1637,7 @@ // If this isn't a store, isn't a store to the same location, or is not the // right kind of store, bail out. OtherStore = dyn_cast(BBI); - if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || - !SI.isSameOperationAs(OtherStore)) + if (!OtherStoreIsMergeable(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the @@ -1641,12 +1651,10 @@ // lives in OtherBB. for (;; --BBI) { // Check to see if we find the matching store. - if ((OtherStore = dyn_cast(BBI))) { - if (OtherStore->getOperand(1) != SI.getOperand(1) || - !SI.isSameOperationAs(OtherStore)) - return false; + OtherStore = dyn_cast(BBI); + if (OtherStoreIsMergeable(OtherStore)) break; - } + // If we find something that may be using or overwriting the stored // value, or if we run out of instructions, we can't do the transform. if (BBI->mayReadFromMemory() || BBI->mayThrow() || @@ -1664,14 +1672,17 @@ } // Insert a PHI node now if we need it. - Value *MergedVal = OtherStore->getOperand(0); + Value *MergedVal = OtherStore->getValueOperand(); // The debug locations of the original instructions might differ. Merge them. DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc()); - if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); - PN->addIncoming(SI.getOperand(0), SI.getParent()); - PN->addIncoming(OtherStore->getOperand(0), OtherBB); + if (MergedVal != SI.getValueOperand()) { + PHINode *PN = + PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge"); + PN->addIncoming(SI.getValueOperand(), SI.getParent()); + Builder.SetInsertPoint(OtherStore); + PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()), + OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); PN->setDebugLoc(MergedLoc); } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -106,6 +106,8 @@ "Number of min/max expressions hoisted out of the loop"); STATISTIC(NumGEPsHoisted, "Number of geps reassociated and hoisted out of the loop"); +STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated " + "and hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -2525,10 +2527,89 @@ return true; } +/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here +/// C1 and C2 are loop invariants and LV is a loop-variant. +static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS, + Value *InvariantRHS, ICmpInst &ICmp, Loop &L, + ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, + AssumptionCache *AC, DominatorTree *DT) { + assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); + assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); + assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); + + // Try to represent VariantLHS as sum of invariant and variant operands. + using namespace PatternMatch; + Value *VariantOp, *InvariantOp; + if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp)))) + return false; + + // LHS itself is a loop-variant, try to represent it in the form: + // "VariantOp + InvariantOp". If it is possible, then we can reassociate. + if (L.isLoopInvariant(VariantOp)) + std::swap(VariantOp, InvariantOp); + if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp)) + return false; + + // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to + // freely move values from left side of inequality to right side (just as in + // normal linear arithmetics). Overflows make things much more complicated, so + // we want to avoid this. + auto &DL = L.getHeader()->getModule()->getDataLayout(); + bool ProvedNoOverflowAfterReassociate = + computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp, + DT) == llvm::OverflowResult::NeverOverflows; + if (!ProvedNoOverflowAfterReassociate) + return false; + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op", + /*HasNUW*/ false, /*HasNSW*/ true); + ICmp.setPredicate(Pred); + ICmp.setOperand(0, VariantOp); + ICmp.setOperand(1, NewCmpOp); + eraseInstruction(cast(*VariantLHS), SafetyInfo, MSSAU); + return true; +} + +/// Reassociate and hoist add/sub expressions. +static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + using namespace PatternMatch; + ICmpInst::Predicate Pred; + Value *LHS, *RHS; + if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) + return false; + + // TODO: Support unsigned predicates? + if (!ICmpInst::isSigned(Pred)) + return false; + + // Put variant operand to LHS position. + if (L.isLoopInvariant(LHS)) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + // We want to delete the initial operation after reassociation, so only do it + // if it has no other uses. + if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse()) + return false; + + // TODO: We could go with smarter context, taking common dominator of all I's + // users instead of I itself. + if (hoistAdd(Pred, LHS, RHS, cast(I), L, SafetyInfo, MSSAU, AC, DT)) + return true; + + // TODO: Support Sub. + + return false; +} + static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU, - AssumptionCache *AC, DominatorTree *DT) { + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. @@ -2545,6 +2626,13 @@ return true; } + // Try to hoist add/sub's by reassociation. + if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + ++NumAddSubHoisted; + return true; + } + return false; } diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -664,13 +664,6 @@ return A->second.PredInfo->getPredicateInfoFor(I); } - const LoopInfo &getLoopInfo(Function &F) { - auto A = AnalysisResults.find(&F); - assert(A != AnalysisResults.end() && A->second.LI && - "Need LoopInfo analysis results for function."); - return *A->second.LI; - } - DomTreeUpdater getDTU(Function &F) { auto A = AnalysisResults.find(&F); assert(A != AnalysisResults.end() && "Need analysis results for function."); @@ -1962,10 +1955,6 @@ return Visitor->getPredicateInfoFor(I); } -const LoopInfo &SCCPSolver::getLoopInfo(Function &F) { - return Visitor->getLoopInfo(F); -} - DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); } void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7394,7 +7394,8 @@ // stay in vectorized code due to uses outside of these scalar // loads/stores. ScalarCost = TTI->getPointersChainCost( - Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), CostKind); + Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy, + CostKind); SmallVector PtrsRetainedInVecCode; for (Value *V : Ptrs) { @@ -7420,7 +7421,7 @@ } VecCost = TTI->getPointersChainCost( PtrsRetainedInVecCode, BasePtr, - TTI::PointersChainInfo::getKnownStride(), CostKind); + TTI::PointersChainInfo::getKnownStride(), VecTy, CostKind); } else { // Case 1: Ptrs are the arguments of loads that we are going to transform // into masked gather load intrinsic. @@ -7436,7 +7437,8 @@ ? TTI::PointersChainInfo::getUnknownStride() : TTI::PointersChainInfo::getKnownStride(); - ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, CostKind); + ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy, + CostKind); // Remark: it not quite correct to use scalar GEP cost for a vector GEP, // but it's not clear how to do that without having vector GEP arguments diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -686,6 +686,11 @@ } PHINode *getPhi() const { return Phi; } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the VPLiveOut to \p O. + void print(raw_ostream &O, VPSlotTracker &SlotTracker) const; +#endif }; /// VPRecipeBase is a base class modeling a sequence of one or more output IR diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -791,11 +791,7 @@ if (!LiveOuts.empty()) O << "\n"; for (const auto &KV : LiveOuts) { - O << "Live-out "; - KV.second->getPhi()->printAsOperand(O); - O << " = "; - KV.second->getOperand(0)->printAsOperand(O, SlotTracker); - O << "\n"; + KV.second->print(O, SlotTracker); } O << "}\n"; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -169,6 +169,16 @@ State.Builder.GetInsertBlock()); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { + O << "Live-out "; + getPhi()->printAsOperand(O); + O << " = "; + getOperand(0)->printAsOperand(O, SlotTracker); + O << "\n"; +} +#endif + void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && diff --git a/llvm/test/Analysis/ScalarEvolution/pr62380.ll b/llvm/test/Analysis/ScalarEvolution/pr62380.ll --- a/llvm/test/Analysis/ScalarEvolution/pr62380.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr62380.ll @@ -1,12 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -passes='loop(loop-deletion),loop-mssa(loop-predication,licm,simple-loop-unswitch),loop(loop-predication)' -S < %s | FileCheck %s -; REQUIRES: asserts -; XFAIL: * - target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" target triple = "x86_64-unknown-linux-gnu" define void @test(i32 %arg) { +; CHECK-LABEL: define void @test +; CHECK-SAME: (i32 [[ARG:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br i1 false, label [[BB3_PREHEADER:%.*]], label [[BB1]] +; CHECK: bb3.preheader: +; CHECK-NEXT: [[LOAD_LE:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3.loopexit: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[ADD:%.*]], [[BB3_LOOPEXIT:%.*]] ], [ 0, [[BB3_PREHEADER]] ] +; CHECK-NEXT: [[ADD]] = add i32 [[PHI]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[PHI]], [[LOAD_LE]] +; CHECK-NEXT: br i1 [[ICMP]], label [[BB5:%.*]], label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: ret void +; CHECK: bb5: +; CHECK-NEXT: [[CALL:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: br i1 [[CALL]], label [[BB9_PREHEADER:%.*]], label [[BB14:%.*]] +; CHECK: bb9.preheader: +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[PHI10:%.*]], 1 +; CHECK-NEXT: [[ICMP8:%.*]] = icmp ugt i32 [[PHI10]], 1 +; CHECK-NEXT: br i1 [[ICMP8]], label [[BB3_LOOPEXIT]], label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: [[PHI10]] = phi i32 [ [[ADD7]], [[BB6:%.*]] ], [ [[PHI]], [[BB9_PREHEADER]] ] +; CHECK-NEXT: [[ICMP11:%.*]] = icmp ult i32 [[PHI10]], [[ARG]] +; CHECK-NEXT: [[CALL12:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[AND:%.*]] = and i1 [[ICMP11]], true +; CHECK-NEXT: br i1 [[AND]], label [[BB6]], label [[BB13:%.*]] +; CHECK: bb13: +; CHECK-NEXT: ret void +; CHECK: bb14: +; CHECK-NEXT: ret void +; bb: br label %bb1 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -55,6 +55,14 @@ ret %res } +define @reinterpret_bool_from_svcount(target("aarch64.svcount") %pg) "target-features"="+sme2" { +; CHECK-LABEL: reinterpret_bool_from_svcount: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") %pg) + ret %out +} + ; ; Converting from svbool_t ; @@ -99,6 +107,15 @@ ret %out } +define target("aarch64.svcount") @reinterpret_bool_to_svcount( %pg) "target-features"="+sme2" { +; CHECK-LABEL: reinterpret_bool_to_svcount: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %out = call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( %pg) + ret target("aarch64.svcount") %out +} + + ; Reinterpreting a ptrue should not introduce an `and` instruction. define @reinterpret_ptrue() { ; CHECK-LABEL: reinterpret_ptrue: @@ -142,9 +159,11 @@ declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() declare @llvm.aarch64.sve.convert.to.svbool.nxv1i1() +declare @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount")) declare @llvm.aarch64.sve.convert.from.svbool.nxv16i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() declare @llvm.aarch64.sve.convert.from.svbool.nxv1i1() +declare target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt() diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-psel.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-psel.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-psel.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-psel.ll @@ -22,70 +22,70 @@ ret %res } -define @psel_h( %p1, %p2, i32 %idx) { +define @psel_h( %p1, %p2, i32 %idx) { ; CHECK-LABEL: psel_h: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: psel p0, p0, p1.h[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.psel.nxv8i1( %p1, %p2, i32 %idx) - ret %res + %res = call @llvm.aarch64.sve.psel.nxv8i1( %p1, %p2, i32 %idx) + ret %res } -define @psel_h_imm( %p1, %p2, i32 %idx) { +define @psel_h_imm( %p1, %p2, i32 %idx) { ; CHECK-LABEL: psel_h_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: psel p0, p0, p1.h[w12, 7] ; CHECK-NEXT: ret %add = add i32 %idx, 7 - %res = call @llvm.aarch64.sve.psel.nxv8i1( %p1, %p2, i32 %add) - ret %res + %res = call @llvm.aarch64.sve.psel.nxv8i1( %p1, %p2, i32 %add) + ret %res } -define @psel_s( %p1, %p2, i32 %idx) { +define @psel_s( %p1, %p2, i32 %idx) { ; CHECK-LABEL: psel_s: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: psel p0, p0, p1.s[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.psel.nxv4i1( %p1, %p2, i32 %idx) - ret %res + %res = call @llvm.aarch64.sve.psel.nxv4i1( %p1, %p2, i32 %idx) + ret %res } -define @psel_s_imm( %p1, %p2, i32 %idx) { +define @psel_s_imm( %p1, %p2, i32 %idx) { ; CHECK-LABEL: psel_s_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: psel p0, p0, p1.s[w12, 3] ; CHECK-NEXT: ret %add = add i32 %idx, 3 - %res = call @llvm.aarch64.sve.psel.nxv4i1( %p1, %p2, i32 %add) - ret %res + %res = call @llvm.aarch64.sve.psel.nxv4i1( %p1, %p2, i32 %add) + ret %res } -define @psel_d( %p1, %p2, i32 %idx) { +define @psel_d( %p1, %p2, i32 %idx) { ; CHECK-LABEL: psel_d: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: psel p0, p0, p1.d[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.psel.nxv2i1( %p1, %p2, i32 %idx) - ret %res + %res = call @llvm.aarch64.sve.psel.nxv2i1( %p1, %p2, i32 %idx) + ret %res } -define @psel_d_imm( %p1, %p2, i32 %idx) { +define @psel_d_imm( %p1, %p2, i32 %idx) { ; CHECK-LABEL: psel_d_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: psel p0, p0, p1.d[w12, 1] ; CHECK-NEXT: ret %add = add i32 %idx, 1 - %res = call @llvm.aarch64.sve.psel.nxv2i1( %p1, %p2, i32 %add) - ret %res + %res = call @llvm.aarch64.sve.psel.nxv2i1( %p1, %p2, i32 %add) + ret %res } declare @llvm.aarch64.sve.psel.nxv16i1(, , i32) -declare @llvm.aarch64.sve.psel.nxv8i1(, , i32) -declare @llvm.aarch64.sve.psel.nxv4i1(, , i32) -declare @llvm.aarch64.sve.psel.nxv2i1(, , i32) +declare @llvm.aarch64.sve.psel.nxv8i1(, , i32) +declare @llvm.aarch64.sve.psel.nxv4i1(, , i32) +declare @llvm.aarch64.sve.psel.nxv2i1(, , i32) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll @@ -0,0 +1,648 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 < %s | FileCheck %s + +; == Normal Multi-Vector Consecutive Loads == + +define { , } @ld1_x2_i8(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1b { z0.b, z1.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_x2_i16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_x2_i32(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1w { z0.s, z1.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_x2_i64(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1d { z0.d, z1.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_x2_f16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_x2_bf16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8bf16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_x2_f32(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1w { z0.s, z1.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_x2_f64(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1d { z0.d, z1.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +; Test to ensure we load into the correct registers for the instruction +define @ld1_x2_i8_z0_taken(target("aarch64.svcount") %pn, ptr %ptr, %val) { +; CHECK-LABEL: ld1_x2_i8_z0_taken: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1b { z2.b, z3.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: add z0.b, z0.b, z2.b +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %ld1 = call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %pn, ptr %ptr); + %ld1_0 = extractvalue { , } %ld1, 0 + %res = add %val, %ld1_0 + ret %res +} + +define { , , , } @ld1_x4_i8(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1b { z0.b - z3.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_x4_i16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_x4_i32(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1w { z0.s - z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_x4_i64(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1d { z0.d - z3.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_x4_f16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_x4_bf16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8bf16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_x4_f32(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1w { z0.s - z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_x4_f64(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1d { z0.d - z3.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +; Test to ensure we load into the correct registers for the instruction +define @ld1_x4_i16_z0_taken(target("aarch64.svcount") %pn, ptr %ptr, %val) { +; CHECK-LABEL: ld1_x4_i16_z0_taken: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z4.h - z7.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: add z0.h, z0.h, z4.h +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %ld1 = call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") %pn, ptr %ptr); + %ld1_0 = extractvalue { , , , } %ld1, 0 + %res = add %val, %ld1_0 + ret %res +} + + +; == Non-temporal Multi-Vector Consecutive Loads == + +define { , } @ldnt1_x2_i8(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1b { z0.b, z1.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_x2_i16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_x2_i32(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_x2_i64(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_x2_f16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_x2_bf16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8bf16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_x2_f32(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_x2_f64(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") %pn, ptr %ptr); + ret { , } %res +} + +; Test to ensure we load into the correct registers for the instruction +define @ldnt1_x2_i32_z0_taken(target("aarch64.svcount") %pn, ptr %ptr, %val) { +; CHECK-LABEL: ldnt1_x2_i32_z0_taken: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z2.s, z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: add z0.s, z0.s, z2.s +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %ld1 = call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") %pn, ptr %ptr); + %ld1_0 = extractvalue { , } %ld1, 0 + %res = add %val, %ld1_0 + ret %res +} + +define { , , , } @ldnt1_x4_i8(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1b { z0.b - z3.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_x4_i16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_x4_i32(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z0.s - z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_x4_i64(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z0.d - z3.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_x4_f16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_x4_bf16(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8bf16(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_x4_f32(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z0.s - z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_x4_f64(target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z0.d - z3.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") %pn, ptr %ptr); + ret { , , , } %res +} + +; Test to ensure we load into the correct registers for the instruction +define @ldnt1_x4_i64_z0_taken(target("aarch64.svcount") %pn, ptr %ptr, %val) { +; CHECK-LABEL: ldnt1_x4_i64_z0_taken: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z4.d - z7.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: add z0.d, z0.d, z4.d +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %ld1 = call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") %pn, ptr %ptr); + %ld1_0 = extractvalue { , , , } %ld1, 0 + %res = add %val, %ld1_0 + ret %res +} + +declare { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8bf16(target("aarch64.svcount"), ptr) + +declare { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8bf16(target("aarch64.svcount"), ptr) + +declare { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount"), ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8bf16(target("aarch64.svcount"), ptr) + +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount"), ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8bf16(target("aarch64.svcount"), ptr) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; == 8 to 64-bit elements == + +define { , } @sel_x2_i8(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zm1, %zm2) nounwind { +; CHECK-LABEL: sel_x2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: sel { z0.b, z1.b }, pn8, { z6.b, z7.b }, { z4.b, z5.b } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @sel_x2_i16(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zm1, %zm2) nounwind { +; CHECK-LABEL: sel_x2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @sel_x2_f16(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zm1, %zm2) nounwind { +; CHECK-LABEL: sel_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @sel_x2_bf16(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zm1, %zm2) nounwind { +; CHECK-LABEL: sel_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @sel_x2_i32(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zm1, %zm2) nounwind { +; CHECK-LABEL: sel_x2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: sel { z0.s, z1.s }, pn8, { z6.s, z7.s }, { z4.s, z5.s } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @sel_x2_f32(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zm1, %zm2) nounwind { +; CHECK-LABEL: sel_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: sel { z0.s, z1.s }, pn8, { z6.s, z7.s }, { z4.s, z5.s } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @sel_x2_i64(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zm1, %zm2) nounwind { +; CHECK-LABEL: sel_x2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: sel { z0.d, z1.d }, pn8, { z6.d, z7.d }, { z4.d, z5.d } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @sel_x2_f64(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zm1, %zm2) nounwind { +; CHECK-LABEL: sel_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: sel { z0.d, z1.d }, pn8, { z6.d, z7.d }, { z4.d, z5.d } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) + ret { , } %res +} + +; == 8 to 64-bit elements == +declare { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) +declare { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) +declare { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) +declare { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) +declare { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) +declare { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) +declare { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) +declare { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") %pn, %zn1, %zn2, %zm1, %zm2) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll @@ -0,0 +1,215 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; == 8 to 64-bit elements == + +define { , , , } @sel_x4_i8(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) nounwind { +; CHECK-LABEL: sel_x4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z31.d, z4.d +; CHECK-NEXT: ld1b { z27.b }, p1/z, [x0] +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z29.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sel { z0.b - z3.b }, pn8, { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @sel_x4_i16(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) nounwind { +; CHECK-LABEL: sel_x4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z31.d, z4.d +; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z29.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @sel_x4_f16(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) nounwind { +; CHECK-LABEL: sel_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z31.d, z4.d +; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z29.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @sel_x4_bf16(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) nounwind { +; CHECK-LABEL: sel_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z31.d, z4.d +; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z29.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @sel_x4_i32(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) nounwind { +; CHECK-LABEL: sel_x4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z31.d, z4.d +; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0] +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z29.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @sel_x4_f32(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) nounwind { +; CHECK-LABEL: sel_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z31.d, z4.d +; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0] +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z29.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @sel_x4_i64(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) nounwind { +; CHECK-LABEL: sel_x4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z31.d, z4.d +; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0] +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z29.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @sel_x4_f64(target("aarch64.svcount") %pn, %unused, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) nounwind { +; CHECK-LABEL: sel_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z31.d, z4.d +; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0] +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z29.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d } +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + + +; == 8 to 64-bit elements == +declare { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) +declare { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) +declare { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) +declare { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) +declare { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) +declare { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) +declare { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) +declare { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") %pn, %zn1, %zn2, %zn3, %zn4, %zm1, %zm2, %zm3, %zm4) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll @@ -0,0 +1,650 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 < %s | FileCheck %s + +; == Normal Multi-Vector Consecutive Stores == + +define void @st1_x2_i8( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: st1b { z2.b, z3.b }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x2_i16( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x2_i32( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x2_i64( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x2_f16( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x2_bf16( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x2.nxv8bf16( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x2_f32( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x2_f64( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x4_i8( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: st1b { z4.b - z7.b }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x4_i16( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x4_i32( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x4_i64( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x4_f16( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x4_bf16( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x4.nxv8bf16( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x4_f32( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @st1_x4_f64( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +; == Non-temporal Multi-Vector Consecutive Stores == + +define void @stnt1_x2_i8( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1b { z2.b, z3.b }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x2_i16( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x2_i32( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x2_i64( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x2_f16( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x2_bf16( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8bf16( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x2_f32( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x2_f64( %unused, %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( %zn0, %zn1, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x4_i8( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: stnt1b { z4.b - z7.b }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x4_i16( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x4_i32( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x4_i64( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x4_f16( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x4_bf16( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8bf16( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x4_f32( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +define void @stnt1_x4_f64( %unused, %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( %zn0, %zn1, %zn2, %zn3, target("aarch64.svcount") %pn, ptr %ptr); + ret void +} + +declare void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x2.nxv8bf16(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8bf16(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(, , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(, , target("aarch64.svcount"), ptr) + + +declare void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x4.nxv8bf16(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8bf16(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(, , , , target("aarch64.svcount"), ptr) +declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(, , , , target("aarch64.svcount"), ptr) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll @@ -12,19 +12,19 @@ ; CHECK: then: ; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: ; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ 3.140000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ 9.900000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 6.140000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 9.900000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 6.140000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ poison, [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 @@ -57,19 +57,19 @@ ; CHECK: then: ; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: ; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 @@ -101,19 +101,19 @@ ; CHECK: then: ; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: ; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ poison, [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 @@ -145,25 +145,25 @@ ; CHECK: then: ; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: ; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[IN]], <5 x double> poison, <5 x i32> -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: -; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 @@ -196,26 +196,26 @@ ; CHECK: then: ; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: ; CHECK-NEXT: [[LOCAL_SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN]], double 3.250000e+00, i64 2 ; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[LOCAL_SHUFFLE_SRC]], <5 x double> [[IN]], <5 x i32> -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: -; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 @@ -249,26 +249,26 @@ ; CHECK: then: ; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: ; CHECK-NEXT: [[LOCAL_SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN]], double 3.250000e+00, i64 2 ; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[IN]], <5 x double> [[LOCAL_SHUFFLE_SRC]], <5 x i32> -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: -; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 @@ -340,24 +340,24 @@ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <5 x double> [[TMP1]], double [[Z:%.*]], i64 3 ; CHECK-NEXT: [[X_4:%.*]] = insertelement <5 x double> [[TMP2]], double [[X]], i64 4 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X_4]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X_4]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X_4]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X_4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X_4]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[IN]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[IN]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[IN]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[IN]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[IN]], i64 4 ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] -; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP3]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP4]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP5]], i64 2 @@ -394,24 +394,24 @@ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <5 x double> , double [[X:%.*]], i64 3 ; CHECK-NEXT: [[X_4:%.*]] = insertelement <5 x double> [[TMP0]], double [[X]], i64 4 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X_4]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X_4]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X_4]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X_4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X_4]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[IN]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[IN]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[IN]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[IN]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[IN]], i64 4 ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP1]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP2]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP3]], i64 2 @@ -446,24 +446,24 @@ ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <5 x double> [[X_1]], <5 x double> , <5 x i32> ; CHECK-NEXT: [[X_4:%.*]] = insertelement <5 x double> [[TMP0]], double [[X]], i64 2 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X_4]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X_4]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X_4]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X_4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X_4]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[IN]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[IN]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[IN]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[IN]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[IN]], i64 4 ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP1]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP2]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP3]], i64 2 @@ -571,19 +571,19 @@ ; CHECK: then: ; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: ; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 @@ -593,17 +593,17 @@ ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]] ; CHECK: then1: ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE22:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE43:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE64:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE85:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE01]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE23]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE34]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE45]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE22]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE43]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE64]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE85]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE06:%.*]] = insertelement <5 x double> poison, double [[TMP5]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE17:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE06]], double [[TMP6]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE28:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE17]], double [[TMP7]], i64 2 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-force-break-large-phis %s | FileCheck %s --check-prefixes=OPT +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-force-break-large-phis -verify %s | FileCheck %s --check-prefixes=OPT ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare --global-isel %s | FileCheck %s --check-prefixes=NOOPT ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-break-large-phis=0 %s | FileCheck %s --check-prefixes=NOOPT @@ -9,32 +9,32 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = extractelement <5 x double> [[X]], i64 0 -; OPT-NEXT: [[TMP1:%.*]] = extractelement <5 x double> [[X]], i64 1 -; OPT-NEXT: [[TMP2:%.*]] = extractelement <5 x double> [[X]], i64 2 -; OPT-NEXT: [[TMP3:%.*]] = extractelement <5 x double> [[X]], i64 3 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <5 x double> [[IN]], double 9.140000e+00, i32 2 -; OPT-NEXT: [[TMP5:%.*]] = extractelement <5 x double> [[Y]], i64 0 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <5 x double> [[Y]], i64 1 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <5 x double> [[Y]], i64 2 -; OPT-NEXT: [[TMP8:%.*]] = extractelement <5 x double> [[Y]], i64 3 -; OPT-NEXT: [[TMP9:%.*]] = extractelement <5 x double> [[Y]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[Y]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[Y]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[Y]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[Y]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[Y]], i64 4 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP10:%.*]] = phi double [ [[TMP0]], [[THEN]] ], [ [[TMP5]], [[ELSE]] ] -; OPT-NEXT: [[TMP11:%.*]] = phi double [ [[TMP1]], [[THEN]] ], [ [[TMP6]], [[ELSE]] ] -; OPT-NEXT: [[TMP12:%.*]] = phi double [ [[TMP2]], [[THEN]] ], [ [[TMP7]], [[ELSE]] ] -; OPT-NEXT: [[TMP13:%.*]] = phi double [ [[TMP3]], [[THEN]] ], [ [[TMP8]], [[ELSE]] ] -; OPT-NEXT: [[TMP14:%.*]] = phi double [ [[TMP4]], [[THEN]] ], [ [[TMP9]], [[ELSE]] ] -; OPT-NEXT: [[TMP15:%.*]] = insertelement <5 x double> poison, double [[TMP10]], i64 0 -; OPT-NEXT: [[TMP16:%.*]] = insertelement <5 x double> [[TMP15]], double [[TMP11]], i64 1 -; OPT-NEXT: [[TMP17:%.*]] = insertelement <5 x double> [[TMP16]], double [[TMP12]], i64 2 -; OPT-NEXT: [[TMP18:%.*]] = insertelement <5 x double> [[TMP17]], double [[TMP13]], i64 3 -; OPT-NEXT: [[TMP19:%.*]] = insertelement <5 x double> [[TMP18]], double [[TMP14]], i64 4 -; OPT-NEXT: store <5 x double> [[TMP19]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; OPT-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v5f64( @@ -71,40 +71,40 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <7 x double> [[IN:%.*]], double 3.140000e+00, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = extractelement <7 x double> [[X]], i64 0 -; OPT-NEXT: [[TMP1:%.*]] = extractelement <7 x double> [[X]], i64 1 -; OPT-NEXT: [[TMP2:%.*]] = extractelement <7 x double> [[X]], i64 2 -; OPT-NEXT: [[TMP3:%.*]] = extractelement <7 x double> [[X]], i64 3 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <7 x double> [[X]], i64 4 -; OPT-NEXT: [[TMP5:%.*]] = extractelement <7 x double> [[X]], i64 5 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <7 x double> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <7 x double> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <7 x double> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <7 x double> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <7 x double> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <7 x double> [[X]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <7 x double> [[X]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <7 x double> [[X]], i64 6 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <7 x double> [[IN]], double 9.140000e+00, i32 6 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <7 x double> [[Y]], i64 0 -; OPT-NEXT: [[TMP8:%.*]] = extractelement <7 x double> [[Y]], i64 1 -; OPT-NEXT: [[TMP9:%.*]] = extractelement <7 x double> [[Y]], i64 2 -; OPT-NEXT: [[TMP10:%.*]] = extractelement <7 x double> [[Y]], i64 3 -; OPT-NEXT: [[TMP11:%.*]] = extractelement <7 x double> [[Y]], i64 4 -; OPT-NEXT: [[TMP12:%.*]] = extractelement <7 x double> [[Y]], i64 5 -; OPT-NEXT: [[TMP13:%.*]] = extractelement <7 x double> [[Y]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <7 x double> [[Y]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <7 x double> [[Y]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <7 x double> [[Y]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <7 x double> [[Y]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <7 x double> [[Y]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <7 x double> [[Y]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <7 x double> [[Y]], i64 6 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP14:%.*]] = phi double [ [[TMP0]], [[THEN]] ], [ [[TMP7]], [[ELSE]] ] -; OPT-NEXT: [[TMP15:%.*]] = phi double [ [[TMP1]], [[THEN]] ], [ [[TMP8]], [[ELSE]] ] -; OPT-NEXT: [[TMP16:%.*]] = phi double [ [[TMP2]], [[THEN]] ], [ [[TMP9]], [[ELSE]] ] -; OPT-NEXT: [[TMP17:%.*]] = phi double [ [[TMP3]], [[THEN]] ], [ [[TMP10]], [[ELSE]] ] -; OPT-NEXT: [[TMP18:%.*]] = phi double [ [[TMP4]], [[THEN]] ], [ [[TMP11]], [[ELSE]] ] -; OPT-NEXT: [[TMP19:%.*]] = phi double [ [[TMP5]], [[THEN]] ], [ [[TMP12]], [[ELSE]] ] -; OPT-NEXT: [[TMP20:%.*]] = phi double [ [[TMP6]], [[THEN]] ], [ [[TMP13]], [[ELSE]] ] -; OPT-NEXT: [[TMP21:%.*]] = insertelement <7 x double> poison, double [[TMP14]], i64 0 -; OPT-NEXT: [[TMP22:%.*]] = insertelement <7 x double> [[TMP21]], double [[TMP15]], i64 1 -; OPT-NEXT: [[TMP23:%.*]] = insertelement <7 x double> [[TMP22]], double [[TMP16]], i64 2 -; OPT-NEXT: [[TMP24:%.*]] = insertelement <7 x double> [[TMP23]], double [[TMP17]], i64 3 -; OPT-NEXT: [[TMP25:%.*]] = insertelement <7 x double> [[TMP24]], double [[TMP18]], i64 4 -; OPT-NEXT: [[TMP26:%.*]] = insertelement <7 x double> [[TMP25]], double [[TMP19]], i64 5 -; OPT-NEXT: [[TMP27:%.*]] = insertelement <7 x double> [[TMP26]], double [[TMP20]], i64 6 -; OPT-NEXT: store <7 x double> [[TMP27]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <7 x double> poison, double [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <7 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <7 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <7 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <7 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <7 x double> [[LARGEPHI_INSERTSLICE4]], double [[TMP5]], i64 5 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = insertelement <7 x double> [[LARGEPHI_INSERTSLICE5]], double [[TMP6]], i64 6 +; OPT-NEXT: store <7 x double> [[LARGEPHI_INSERTSLICE6]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v7f64( @@ -141,56 +141,56 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <11 x double> [[IN:%.*]], double 3.140000e+00, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = extractelement <11 x double> [[X]], i64 0 -; OPT-NEXT: [[TMP1:%.*]] = extractelement <11 x double> [[X]], i64 1 -; OPT-NEXT: [[TMP2:%.*]] = extractelement <11 x double> [[X]], i64 2 -; OPT-NEXT: [[TMP3:%.*]] = extractelement <11 x double> [[X]], i64 3 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <11 x double> [[X]], i64 4 -; OPT-NEXT: [[TMP5:%.*]] = extractelement <11 x double> [[X]], i64 5 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <11 x double> [[X]], i64 6 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <11 x double> [[X]], i64 7 -; OPT-NEXT: [[TMP8:%.*]] = extractelement <11 x double> [[X]], i64 8 -; OPT-NEXT: [[TMP9:%.*]] = extractelement <11 x double> [[X]], i64 9 -; OPT-NEXT: [[TMP10:%.*]] = extractelement <11 x double> [[X]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <11 x double> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <11 x double> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <11 x double> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <11 x double> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <11 x double> [[X]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <11 x double> [[X]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <11 x double> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE14:%.*]] = extractelement <11 x double> [[X]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE16:%.*]] = extractelement <11 x double> [[X]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE18:%.*]] = extractelement <11 x double> [[X]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE20:%.*]] = extractelement <11 x double> [[X]], i64 10 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <11 x double> [[IN]], double 9.140000e+00, i32 6 -; OPT-NEXT: [[TMP11:%.*]] = extractelement <11 x double> [[Y]], i64 0 -; OPT-NEXT: [[TMP12:%.*]] = extractelement <11 x double> [[Y]], i64 1 -; OPT-NEXT: [[TMP13:%.*]] = extractelement <11 x double> [[Y]], i64 2 -; OPT-NEXT: [[TMP14:%.*]] = extractelement <11 x double> [[Y]], i64 3 -; OPT-NEXT: [[TMP15:%.*]] = extractelement <11 x double> [[Y]], i64 4 -; OPT-NEXT: [[TMP16:%.*]] = extractelement <11 x double> [[Y]], i64 5 -; OPT-NEXT: [[TMP17:%.*]] = extractelement <11 x double> [[Y]], i64 6 -; OPT-NEXT: [[TMP18:%.*]] = extractelement <11 x double> [[Y]], i64 7 -; OPT-NEXT: [[TMP19:%.*]] = extractelement <11 x double> [[Y]], i64 8 -; OPT-NEXT: [[TMP20:%.*]] = extractelement <11 x double> [[Y]], i64 9 -; OPT-NEXT: [[TMP21:%.*]] = extractelement <11 x double> [[Y]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <11 x double> [[Y]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <11 x double> [[Y]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <11 x double> [[Y]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <11 x double> [[Y]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <11 x double> [[Y]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <11 x double> [[Y]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <11 x double> [[Y]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE15:%.*]] = extractelement <11 x double> [[Y]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE17:%.*]] = extractelement <11 x double> [[Y]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE19:%.*]] = extractelement <11 x double> [[Y]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE21:%.*]] = extractelement <11 x double> [[Y]], i64 10 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP22:%.*]] = phi double [ [[TMP0]], [[THEN]] ], [ [[TMP11]], [[ELSE]] ] -; OPT-NEXT: [[TMP23:%.*]] = phi double [ [[TMP1]], [[THEN]] ], [ [[TMP12]], [[ELSE]] ] -; OPT-NEXT: [[TMP24:%.*]] = phi double [ [[TMP2]], [[THEN]] ], [ [[TMP13]], [[ELSE]] ] -; OPT-NEXT: [[TMP25:%.*]] = phi double [ [[TMP3]], [[THEN]] ], [ [[TMP14]], [[ELSE]] ] -; OPT-NEXT: [[TMP26:%.*]] = phi double [ [[TMP4]], [[THEN]] ], [ [[TMP15]], [[ELSE]] ] -; OPT-NEXT: [[TMP27:%.*]] = phi double [ [[TMP5]], [[THEN]] ], [ [[TMP16]], [[ELSE]] ] -; OPT-NEXT: [[TMP28:%.*]] = phi double [ [[TMP6]], [[THEN]] ], [ [[TMP17]], [[ELSE]] ] -; OPT-NEXT: [[TMP29:%.*]] = phi double [ [[TMP7]], [[THEN]] ], [ [[TMP18]], [[ELSE]] ] -; OPT-NEXT: [[TMP30:%.*]] = phi double [ [[TMP8]], [[THEN]] ], [ [[TMP19]], [[ELSE]] ] -; OPT-NEXT: [[TMP31:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ [[TMP20]], [[ELSE]] ] -; OPT-NEXT: [[TMP32:%.*]] = phi double [ [[TMP10]], [[THEN]] ], [ [[TMP21]], [[ELSE]] ] -; OPT-NEXT: [[TMP33:%.*]] = insertelement <11 x double> poison, double [[TMP22]], i64 0 -; OPT-NEXT: [[TMP34:%.*]] = insertelement <11 x double> [[TMP33]], double [[TMP23]], i64 1 -; OPT-NEXT: [[TMP35:%.*]] = insertelement <11 x double> [[TMP34]], double [[TMP24]], i64 2 -; OPT-NEXT: [[TMP36:%.*]] = insertelement <11 x double> [[TMP35]], double [[TMP25]], i64 3 -; OPT-NEXT: [[TMP37:%.*]] = insertelement <11 x double> [[TMP36]], double [[TMP26]], i64 4 -; OPT-NEXT: [[TMP38:%.*]] = insertelement <11 x double> [[TMP37]], double [[TMP27]], i64 5 -; OPT-NEXT: [[TMP39:%.*]] = insertelement <11 x double> [[TMP38]], double [[TMP28]], i64 6 -; OPT-NEXT: [[TMP40:%.*]] = insertelement <11 x double> [[TMP39]], double [[TMP29]], i64 7 -; OPT-NEXT: [[TMP41:%.*]] = insertelement <11 x double> [[TMP40]], double [[TMP30]], i64 8 -; OPT-NEXT: [[TMP42:%.*]] = insertelement <11 x double> [[TMP41]], double [[TMP31]], i64 9 -; OPT-NEXT: [[TMP43:%.*]] = insertelement <11 x double> [[TMP42]], double [[TMP32]], i64 10 -; OPT-NEXT: store <11 x double> [[TMP43]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE14]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE15]], [[ELSE]] ] +; OPT-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE16]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE17]], [[ELSE]] ] +; OPT-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE18]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE19]], [[ELSE]] ] +; OPT-NEXT: [[TMP10:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE20]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE21]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <11 x double> poison, double [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE4]], double [[TMP5]], i64 5 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE5]], double [[TMP6]], i64 6 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE7:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE6]], double [[TMP7]], i64 7 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE8:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE7]], double [[TMP8]], i64 8 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE9:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE8]], double [[TMP9]], i64 9 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE10:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE9]], double [[TMP10]], i64 10 +; OPT-NEXT: store <11 x double> [[LARGEPHI_INSERTSLICE10]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v11f64( @@ -227,42 +227,42 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[FINALLY:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <11 x double> [[IN:%.*]], double 3.140000e+00, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = extractelement <11 x double> [[X]], i64 0 -; OPT-NEXT: [[TMP1:%.*]] = extractelement <11 x double> [[X]], i64 1 -; OPT-NEXT: [[TMP2:%.*]] = extractelement <11 x double> [[X]], i64 2 -; OPT-NEXT: [[TMP3:%.*]] = extractelement <11 x double> [[X]], i64 3 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <11 x double> [[X]], i64 4 -; OPT-NEXT: [[TMP5:%.*]] = extractelement <11 x double> [[X]], i64 5 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <11 x double> [[X]], i64 6 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <11 x double> [[X]], i64 7 -; OPT-NEXT: [[TMP8:%.*]] = extractelement <11 x double> [[X]], i64 8 -; OPT-NEXT: [[TMP9:%.*]] = extractelement <11 x double> [[X]], i64 9 -; OPT-NEXT: [[TMP10:%.*]] = extractelement <11 x double> [[X]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <11 x double> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <11 x double> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <11 x double> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <11 x double> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <11 x double> [[X]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <11 x double> [[X]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <11 x double> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE14:%.*]] = extractelement <11 x double> [[X]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE16:%.*]] = extractelement <11 x double> [[X]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE18:%.*]] = extractelement <11 x double> [[X]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE20:%.*]] = extractelement <11 x double> [[X]], i64 10 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP11:%.*]] = phi double [ [[TMP0]], [[THEN]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -; OPT-NEXT: [[TMP12:%.*]] = phi double [ [[TMP1]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP13:%.*]] = phi double [ [[TMP2]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP14:%.*]] = phi double [ [[TMP3]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP15:%.*]] = phi double [ [[TMP4]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP16:%.*]] = phi double [ [[TMP5]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP17:%.*]] = phi double [ [[TMP6]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP18:%.*]] = phi double [ [[TMP7]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP19:%.*]] = phi double [ [[TMP8]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP20:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP21:%.*]] = phi double [ [[TMP10]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] -; OPT-NEXT: [[TMP22:%.*]] = insertelement <11 x double> poison, double [[TMP11]], i64 0 -; OPT-NEXT: [[TMP23:%.*]] = insertelement <11 x double> [[TMP22]], double [[TMP12]], i64 1 -; OPT-NEXT: [[TMP24:%.*]] = insertelement <11 x double> [[TMP23]], double [[TMP13]], i64 2 -; OPT-NEXT: [[TMP25:%.*]] = insertelement <11 x double> [[TMP24]], double [[TMP14]], i64 3 -; OPT-NEXT: [[TMP26:%.*]] = insertelement <11 x double> [[TMP25]], double [[TMP15]], i64 4 -; OPT-NEXT: [[TMP27:%.*]] = insertelement <11 x double> [[TMP26]], double [[TMP16]], i64 5 -; OPT-NEXT: [[TMP28:%.*]] = insertelement <11 x double> [[TMP27]], double [[TMP17]], i64 6 -; OPT-NEXT: [[TMP29:%.*]] = insertelement <11 x double> [[TMP28]], double [[TMP18]], i64 7 -; OPT-NEXT: [[TMP30:%.*]] = insertelement <11 x double> [[TMP29]], double [[TMP19]], i64 8 -; OPT-NEXT: [[TMP31:%.*]] = insertelement <11 x double> [[TMP30]], double [[TMP20]], i64 9 -; OPT-NEXT: [[TMP32:%.*]] = insertelement <11 x double> [[TMP31]], double [[TMP21]], i64 10 -; OPT-NEXT: store <11 x double> [[TMP32]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE14]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE16]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE18]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[TMP10:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE20]], [[THEN]] ], [ 0.000000e+00, [[ENTRY]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <11 x double> poison, double [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE4]], double [[TMP5]], i64 5 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE5]], double [[TMP6]], i64 6 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE7:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE6]], double [[TMP7]], i64 7 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE8:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE7]], double [[TMP8]], i64 8 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE9:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE8]], double [[TMP9]], i64 9 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE10:%.*]] = insertelement <11 x double> [[LARGEPHI_INSERTSLICE9]], double [[TMP10]], i64 10 +; OPT-NEXT: store <11 x double> [[LARGEPHI_INSERTSLICE10]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v11f64_cst( @@ -293,72 +293,72 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <15 x i64> [[IN:%.*]], i64 42, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = extractelement <15 x i64> [[X]], i64 0 -; OPT-NEXT: [[TMP1:%.*]] = extractelement <15 x i64> [[X]], i64 1 -; OPT-NEXT: [[TMP2:%.*]] = extractelement <15 x i64> [[X]], i64 2 -; OPT-NEXT: [[TMP3:%.*]] = extractelement <15 x i64> [[X]], i64 3 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <15 x i64> [[X]], i64 4 -; OPT-NEXT: [[TMP5:%.*]] = extractelement <15 x i64> [[X]], i64 5 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <15 x i64> [[X]], i64 6 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <15 x i64> [[X]], i64 7 -; OPT-NEXT: [[TMP8:%.*]] = extractelement <15 x i64> [[X]], i64 8 -; OPT-NEXT: [[TMP9:%.*]] = extractelement <15 x i64> [[X]], i64 9 -; OPT-NEXT: [[TMP10:%.*]] = extractelement <15 x i64> [[X]], i64 10 -; OPT-NEXT: [[TMP11:%.*]] = extractelement <15 x i64> [[X]], i64 11 -; OPT-NEXT: [[TMP12:%.*]] = extractelement <15 x i64> [[X]], i64 12 -; OPT-NEXT: [[TMP13:%.*]] = extractelement <15 x i64> [[X]], i64 13 -; OPT-NEXT: [[TMP14:%.*]] = extractelement <15 x i64> [[X]], i64 14 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <15 x i64> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <15 x i64> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <15 x i64> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <15 x i64> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <15 x i64> [[X]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <15 x i64> [[X]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <15 x i64> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE14:%.*]] = extractelement <15 x i64> [[X]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE16:%.*]] = extractelement <15 x i64> [[X]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE18:%.*]] = extractelement <15 x i64> [[X]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE20:%.*]] = extractelement <15 x i64> [[X]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE22:%.*]] = extractelement <15 x i64> [[X]], i64 11 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE24:%.*]] = extractelement <15 x i64> [[X]], i64 12 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE26:%.*]] = extractelement <15 x i64> [[X]], i64 13 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE28:%.*]] = extractelement <15 x i64> [[X]], i64 14 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <15 x i64> [[IN]], i64 64, i32 6 -; OPT-NEXT: [[TMP15:%.*]] = extractelement <15 x i64> [[Y]], i64 0 -; OPT-NEXT: [[TMP16:%.*]] = extractelement <15 x i64> [[Y]], i64 1 -; OPT-NEXT: [[TMP17:%.*]] = extractelement <15 x i64> [[Y]], i64 2 -; OPT-NEXT: [[TMP18:%.*]] = extractelement <15 x i64> [[Y]], i64 3 -; OPT-NEXT: [[TMP19:%.*]] = extractelement <15 x i64> [[Y]], i64 4 -; OPT-NEXT: [[TMP20:%.*]] = extractelement <15 x i64> [[Y]], i64 5 -; OPT-NEXT: [[TMP21:%.*]] = extractelement <15 x i64> [[Y]], i64 6 -; OPT-NEXT: [[TMP22:%.*]] = extractelement <15 x i64> [[Y]], i64 7 -; OPT-NEXT: [[TMP23:%.*]] = extractelement <15 x i64> [[Y]], i64 8 -; OPT-NEXT: [[TMP24:%.*]] = extractelement <15 x i64> [[Y]], i64 9 -; OPT-NEXT: [[TMP25:%.*]] = extractelement <15 x i64> [[Y]], i64 10 -; OPT-NEXT: [[TMP26:%.*]] = extractelement <15 x i64> [[Y]], i64 11 -; OPT-NEXT: [[TMP27:%.*]] = extractelement <15 x i64> [[Y]], i64 12 -; OPT-NEXT: [[TMP28:%.*]] = extractelement <15 x i64> [[Y]], i64 13 -; OPT-NEXT: [[TMP29:%.*]] = extractelement <15 x i64> [[Y]], i64 14 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <15 x i64> [[Y]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <15 x i64> [[Y]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <15 x i64> [[Y]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <15 x i64> [[Y]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <15 x i64> [[Y]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <15 x i64> [[Y]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <15 x i64> [[Y]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE15:%.*]] = extractelement <15 x i64> [[Y]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE17:%.*]] = extractelement <15 x i64> [[Y]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE19:%.*]] = extractelement <15 x i64> [[Y]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE21:%.*]] = extractelement <15 x i64> [[Y]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <15 x i64> [[Y]], i64 11 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE25:%.*]] = extractelement <15 x i64> [[Y]], i64 12 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE27:%.*]] = extractelement <15 x i64> [[Y]], i64 13 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE29:%.*]] = extractelement <15 x i64> [[Y]], i64 14 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP30:%.*]] = phi i64 [ [[TMP0]], [[THEN]] ], [ [[TMP15]], [[ELSE]] ] -; OPT-NEXT: [[TMP31:%.*]] = phi i64 [ [[TMP1]], [[THEN]] ], [ [[TMP16]], [[ELSE]] ] -; OPT-NEXT: [[TMP32:%.*]] = phi i64 [ [[TMP2]], [[THEN]] ], [ [[TMP17]], [[ELSE]] ] -; OPT-NEXT: [[TMP33:%.*]] = phi i64 [ [[TMP3]], [[THEN]] ], [ [[TMP18]], [[ELSE]] ] -; OPT-NEXT: [[TMP34:%.*]] = phi i64 [ [[TMP4]], [[THEN]] ], [ [[TMP19]], [[ELSE]] ] -; OPT-NEXT: [[TMP35:%.*]] = phi i64 [ [[TMP5]], [[THEN]] ], [ [[TMP20]], [[ELSE]] ] -; OPT-NEXT: [[TMP36:%.*]] = phi i64 [ [[TMP6]], [[THEN]] ], [ [[TMP21]], [[ELSE]] ] -; OPT-NEXT: [[TMP37:%.*]] = phi i64 [ [[TMP7]], [[THEN]] ], [ [[TMP22]], [[ELSE]] ] -; OPT-NEXT: [[TMP38:%.*]] = phi i64 [ [[TMP8]], [[THEN]] ], [ [[TMP23]], [[ELSE]] ] -; OPT-NEXT: [[TMP39:%.*]] = phi i64 [ [[TMP9]], [[THEN]] ], [ [[TMP24]], [[ELSE]] ] -; OPT-NEXT: [[TMP40:%.*]] = phi i64 [ [[TMP10]], [[THEN]] ], [ [[TMP25]], [[ELSE]] ] -; OPT-NEXT: [[TMP41:%.*]] = phi i64 [ [[TMP11]], [[THEN]] ], [ [[TMP26]], [[ELSE]] ] -; OPT-NEXT: [[TMP42:%.*]] = phi i64 [ [[TMP12]], [[THEN]] ], [ [[TMP27]], [[ELSE]] ] -; OPT-NEXT: [[TMP43:%.*]] = phi i64 [ [[TMP13]], [[THEN]] ], [ [[TMP28]], [[ELSE]] ] -; OPT-NEXT: [[TMP44:%.*]] = phi i64 [ [[TMP14]], [[THEN]] ], [ [[TMP29]], [[ELSE]] ] -; OPT-NEXT: [[TMP45:%.*]] = insertelement <15 x i64> poison, i64 [[TMP30]], i64 0 -; OPT-NEXT: [[TMP46:%.*]] = insertelement <15 x i64> [[TMP45]], i64 [[TMP31]], i64 1 -; OPT-NEXT: [[TMP47:%.*]] = insertelement <15 x i64> [[TMP46]], i64 [[TMP32]], i64 2 -; OPT-NEXT: [[TMP48:%.*]] = insertelement <15 x i64> [[TMP47]], i64 [[TMP33]], i64 3 -; OPT-NEXT: [[TMP49:%.*]] = insertelement <15 x i64> [[TMP48]], i64 [[TMP34]], i64 4 -; OPT-NEXT: [[TMP50:%.*]] = insertelement <15 x i64> [[TMP49]], i64 [[TMP35]], i64 5 -; OPT-NEXT: [[TMP51:%.*]] = insertelement <15 x i64> [[TMP50]], i64 [[TMP36]], i64 6 -; OPT-NEXT: [[TMP52:%.*]] = insertelement <15 x i64> [[TMP51]], i64 [[TMP37]], i64 7 -; OPT-NEXT: [[TMP53:%.*]] = insertelement <15 x i64> [[TMP52]], i64 [[TMP38]], i64 8 -; OPT-NEXT: [[TMP54:%.*]] = insertelement <15 x i64> [[TMP53]], i64 [[TMP39]], i64 9 -; OPT-NEXT: [[TMP55:%.*]] = insertelement <15 x i64> [[TMP54]], i64 [[TMP40]], i64 10 -; OPT-NEXT: [[TMP56:%.*]] = insertelement <15 x i64> [[TMP55]], i64 [[TMP41]], i64 11 -; OPT-NEXT: [[TMP57:%.*]] = insertelement <15 x i64> [[TMP56]], i64 [[TMP42]], i64 12 -; OPT-NEXT: [[TMP58:%.*]] = insertelement <15 x i64> [[TMP57]], i64 [[TMP43]], i64 13 -; OPT-NEXT: [[TMP59:%.*]] = insertelement <15 x i64> [[TMP58]], i64 [[TMP44]], i64 14 -; OPT-NEXT: store <15 x i64> [[TMP59]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[TMP7:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE14]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE15]], [[ELSE]] ] +; OPT-NEXT: [[TMP8:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE16]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE17]], [[ELSE]] ] +; OPT-NEXT: [[TMP9:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE18]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE19]], [[ELSE]] ] +; OPT-NEXT: [[TMP10:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE20]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE21]], [[ELSE]] ] +; OPT-NEXT: [[TMP11:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE22]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] +; OPT-NEXT: [[TMP12:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE24]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE25]], [[ELSE]] ] +; OPT-NEXT: [[TMP13:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE26]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE27]], [[ELSE]] ] +; OPT-NEXT: [[TMP14:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE28]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE29]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <15 x i64> poison, i64 [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE0]], i64 [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE1]], i64 [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE2]], i64 [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE3]], i64 [[TMP4]], i64 4 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE4]], i64 [[TMP5]], i64 5 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE5]], i64 [[TMP6]], i64 6 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE7:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE6]], i64 [[TMP7]], i64 7 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE8:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE7]], i64 [[TMP8]], i64 8 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE9:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE8]], i64 [[TMP9]], i64 9 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE10:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE9]], i64 [[TMP10]], i64 10 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE11:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE10]], i64 [[TMP11]], i64 11 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE12:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE11]], i64 [[TMP12]], i64 12 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE13:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE12]], i64 [[TMP13]], i64 13 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE14:%.*]] = insertelement <15 x i64> [[LARGEPHI_INSERTSLICE13]], i64 [[TMP14]], i64 14 +; OPT-NEXT: store <15 x i64> [[LARGEPHI_INSERTSLICE14]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v15i64( @@ -395,68 +395,68 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <27 x i16> [[IN:%.*]], i16 42, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP2:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP3:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP4:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP5:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP6:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP7:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP8:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP9:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP10:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP11:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP12:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP13:%.*]] = extractelement <27 x i16> [[X]], i64 26 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE14:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE16:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE18:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE20:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE22:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE24:%.*]] = shufflevector <27 x i16> [[X]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE26:%.*]] = extractelement <27 x i16> [[X]], i64 26 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <27 x i16> [[IN]], i16 64, i32 6 -; OPT-NEXT: [[TMP14:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP15:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP16:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP17:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP18:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP19:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP20:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP21:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP22:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP23:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP24:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP25:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP26:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP27:%.*]] = extractelement <27 x i16> [[Y]], i64 26 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE15:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE17:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE19:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE21:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE25:%.*]] = shufflevector <27 x i16> [[Y]], <27 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE27:%.*]] = extractelement <27 x i16> [[Y]], i64 26 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP28:%.*]] = phi <2 x i16> [ [[TMP0]], [[THEN]] ], [ [[TMP14]], [[ELSE]] ] -; OPT-NEXT: [[TMP29:%.*]] = phi <2 x i16> [ [[TMP1]], [[THEN]] ], [ [[TMP15]], [[ELSE]] ] -; OPT-NEXT: [[TMP30:%.*]] = phi <2 x i16> [ [[TMP2]], [[THEN]] ], [ [[TMP16]], [[ELSE]] ] -; OPT-NEXT: [[TMP31:%.*]] = phi <2 x i16> [ [[TMP3]], [[THEN]] ], [ [[TMP17]], [[ELSE]] ] -; OPT-NEXT: [[TMP32:%.*]] = phi <2 x i16> [ [[TMP4]], [[THEN]] ], [ [[TMP18]], [[ELSE]] ] -; OPT-NEXT: [[TMP33:%.*]] = phi <2 x i16> [ [[TMP5]], [[THEN]] ], [ [[TMP19]], [[ELSE]] ] -; OPT-NEXT: [[TMP34:%.*]] = phi <2 x i16> [ [[TMP6]], [[THEN]] ], [ [[TMP20]], [[ELSE]] ] -; OPT-NEXT: [[TMP35:%.*]] = phi <2 x i16> [ [[TMP7]], [[THEN]] ], [ [[TMP21]], [[ELSE]] ] -; OPT-NEXT: [[TMP36:%.*]] = phi <2 x i16> [ [[TMP8]], [[THEN]] ], [ [[TMP22]], [[ELSE]] ] -; OPT-NEXT: [[TMP37:%.*]] = phi <2 x i16> [ [[TMP9]], [[THEN]] ], [ [[TMP23]], [[ELSE]] ] -; OPT-NEXT: [[TMP38:%.*]] = phi <2 x i16> [ [[TMP10]], [[THEN]] ], [ [[TMP24]], [[ELSE]] ] -; OPT-NEXT: [[TMP39:%.*]] = phi <2 x i16> [ [[TMP11]], [[THEN]] ], [ [[TMP25]], [[ELSE]] ] -; OPT-NEXT: [[TMP40:%.*]] = phi <2 x i16> [ [[TMP12]], [[THEN]] ], [ [[TMP26]], [[ELSE]] ] -; OPT-NEXT: [[TMP41:%.*]] = phi i16 [ [[TMP13]], [[THEN]] ], [ [[TMP27]], [[ELSE]] ] -; OPT-NEXT: [[TMP42:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> poison, <2 x i16> [[TMP28]], i64 0) -; OPT-NEXT: [[TMP43:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP42]], <2 x i16> [[TMP29]], i64 2) -; OPT-NEXT: [[TMP44:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP43]], <2 x i16> [[TMP30]], i64 4) -; OPT-NEXT: [[TMP45:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP44]], <2 x i16> [[TMP31]], i64 6) -; OPT-NEXT: [[TMP46:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP45]], <2 x i16> [[TMP32]], i64 8) -; OPT-NEXT: [[TMP47:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP46]], <2 x i16> [[TMP33]], i64 10) -; OPT-NEXT: [[TMP48:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP47]], <2 x i16> [[TMP34]], i64 12) -; OPT-NEXT: [[TMP49:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP48]], <2 x i16> [[TMP35]], i64 14) -; OPT-NEXT: [[TMP50:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP49]], <2 x i16> [[TMP36]], i64 16) -; OPT-NEXT: [[TMP51:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP50]], <2 x i16> [[TMP37]], i64 18) -; OPT-NEXT: [[TMP52:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP51]], <2 x i16> [[TMP38]], i64 20) -; OPT-NEXT: [[TMP53:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP52]], <2 x i16> [[TMP39]], i64 22) -; OPT-NEXT: [[TMP54:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[TMP53]], <2 x i16> [[TMP40]], i64 24) -; OPT-NEXT: [[TMP55:%.*]] = insertelement <27 x i16> [[TMP54]], i16 [[TMP41]], i64 26 -; OPT-NEXT: store <27 x i16> [[TMP55]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[TMP7:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE14]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE15]], [[ELSE]] ] +; OPT-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE16]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE17]], [[ELSE]] ] +; OPT-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE18]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE19]], [[ELSE]] ] +; OPT-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE20]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE21]], [[ELSE]] ] +; OPT-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE22]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] +; OPT-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE24]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE25]], [[ELSE]] ] +; OPT-NEXT: [[TMP13:%.*]] = phi i16 [ [[LARGEPHI_EXTRACTSLICE26]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE27]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> poison, <2 x i16> [[TMP0]], i64 0) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE0]], <2 x i16> [[TMP1]], i64 2) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE1]], <2 x i16> [[TMP2]], i64 4) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE2]], <2 x i16> [[TMP3]], i64 6) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE3]], <2 x i16> [[TMP4]], i64 8) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE4]], <2 x i16> [[TMP5]], i64 10) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE5]], <2 x i16> [[TMP6]], i64 12) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE7:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE6]], <2 x i16> [[TMP7]], i64 14) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE8:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE7]], <2 x i16> [[TMP8]], i64 16) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE9:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE8]], <2 x i16> [[TMP9]], i64 18) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE10:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE9]], <2 x i16> [[TMP10]], i64 20) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE11:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE10]], <2 x i16> [[TMP11]], i64 22) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE12:%.*]] = call <27 x i16> @llvm.vector.insert.v27i16.v2i16(<27 x i16> [[LARGEPHI_INSERTSLICE11]], <2 x i16> [[TMP12]], i64 24) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE13:%.*]] = insertelement <27 x i16> [[LARGEPHI_INSERTSLICE12]], i16 [[TMP13]], i64 26 +; OPT-NEXT: store <27 x i16> [[LARGEPHI_INSERTSLICE13]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v27i16( @@ -494,44 +494,44 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <23 x i8> [[IN:%.*]], i8 42, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP2:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP3:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP4:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP5:%.*]] = extractelement <23 x i8> [[X]], i64 20 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <23 x i8> [[X]], i64 21 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <23 x i8> [[X]], i64 22 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = shufflevector <23 x i8> [[X]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <23 x i8> [[X]], i64 20 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <23 x i8> [[X]], i64 21 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE14:%.*]] = extractelement <23 x i8> [[X]], i64 22 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <23 x i8> [[IN]], i8 64, i32 6 -; OPT-NEXT: [[TMP8:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP9:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP10:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP11:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP12:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP13:%.*]] = extractelement <23 x i8> [[Y]], i64 20 -; OPT-NEXT: [[TMP14:%.*]] = extractelement <23 x i8> [[Y]], i64 21 -; OPT-NEXT: [[TMP15:%.*]] = extractelement <23 x i8> [[Y]], i64 22 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <23 x i8> [[Y]], i64 20 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <23 x i8> [[Y]], i64 21 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE15:%.*]] = extractelement <23 x i8> [[Y]], i64 22 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP16:%.*]] = phi <4 x i8> [ [[TMP0]], [[THEN]] ], [ [[TMP8]], [[ELSE]] ] -; OPT-NEXT: [[TMP17:%.*]] = phi <4 x i8> [ [[TMP1]], [[THEN]] ], [ [[TMP9]], [[ELSE]] ] -; OPT-NEXT: [[TMP18:%.*]] = phi <4 x i8> [ [[TMP2]], [[THEN]] ], [ [[TMP10]], [[ELSE]] ] -; OPT-NEXT: [[TMP19:%.*]] = phi <4 x i8> [ [[TMP3]], [[THEN]] ], [ [[TMP11]], [[ELSE]] ] -; OPT-NEXT: [[TMP20:%.*]] = phi <4 x i8> [ [[TMP4]], [[THEN]] ], [ [[TMP12]], [[ELSE]] ] -; OPT-NEXT: [[TMP21:%.*]] = phi i8 [ [[TMP5]], [[THEN]] ], [ [[TMP13]], [[ELSE]] ] -; OPT-NEXT: [[TMP22:%.*]] = phi i8 [ [[TMP6]], [[THEN]] ], [ [[TMP14]], [[ELSE]] ] -; OPT-NEXT: [[TMP23:%.*]] = phi i8 [ [[TMP7]], [[THEN]] ], [ [[TMP15]], [[ELSE]] ] -; OPT-NEXT: [[TMP24:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> poison, <4 x i8> [[TMP16]], i64 0) -; OPT-NEXT: [[TMP25:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[TMP24]], <4 x i8> [[TMP17]], i64 4) -; OPT-NEXT: [[TMP26:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[TMP25]], <4 x i8> [[TMP18]], i64 8) -; OPT-NEXT: [[TMP27:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[TMP26]], <4 x i8> [[TMP19]], i64 12) -; OPT-NEXT: [[TMP28:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[TMP27]], <4 x i8> [[TMP20]], i64 16) -; OPT-NEXT: [[TMP29:%.*]] = insertelement <23 x i8> [[TMP28]], i8 [[TMP21]], i64 20 -; OPT-NEXT: [[TMP30:%.*]] = insertelement <23 x i8> [[TMP29]], i8 [[TMP22]], i64 21 -; OPT-NEXT: [[TMP31:%.*]] = insertelement <23 x i8> [[TMP30]], i8 [[TMP23]], i64 22 -; OPT-NEXT: store <23 x i8> [[TMP31]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi <4 x i8> [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi <4 x i8> [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi i8 [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi i8 [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[TMP7:%.*]] = phi i8 [ [[LARGEPHI_EXTRACTSLICE14]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE15]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> poison, <4 x i8> [[TMP0]], i64 0) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[LARGEPHI_INSERTSLICE0]], <4 x i8> [[TMP1]], i64 4) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[LARGEPHI_INSERTSLICE1]], <4 x i8> [[TMP2]], i64 8) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[LARGEPHI_INSERTSLICE2]], <4 x i8> [[TMP3]], i64 12) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[LARGEPHI_INSERTSLICE3]], <4 x i8> [[TMP4]], i64 16) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <23 x i8> [[LARGEPHI_INSERTSLICE4]], i8 [[TMP5]], i64 20 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = insertelement <23 x i8> [[LARGEPHI_INSERTSLICE5]], i8 [[TMP6]], i64 21 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE7:%.*]] = insertelement <23 x i8> [[LARGEPHI_INSERTSLICE6]], i8 [[TMP7]], i64 22 +; OPT-NEXT: store <23 x i8> [[LARGEPHI_INSERTSLICE7]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v23i8( @@ -571,33 +571,33 @@ ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <23 x i8> [[IN:%.*]], i8 64, i32 6 -; OPT-NEXT: [[TMP0:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP2:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP3:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP4:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP5:%.*]] = extractelement <23 x i8> [[Y]], i64 20 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <23 x i8> [[Y]], i64 21 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <23 x i8> [[Y]], i64 22 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = shufflevector <23 x i8> [[Y]], <23 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <23 x i8> [[Y]], i64 20 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <23 x i8> [[Y]], i64 21 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE15:%.*]] = extractelement <23 x i8> [[Y]], i64 22 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP8:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[TMP0]], [[ELSE]] ] -; OPT-NEXT: [[TMP9:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[TMP1]], [[ELSE]] ] -; OPT-NEXT: [[TMP10:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[TMP2]], [[ELSE]] ] -; OPT-NEXT: [[TMP11:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[TMP3]], [[ELSE]] ] -; OPT-NEXT: [[TMP12:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[TMP4]], [[ELSE]] ] -; OPT-NEXT: [[TMP13:%.*]] = phi i8 [ 0, [[THEN]] ], [ [[TMP5]], [[ELSE]] ] -; OPT-NEXT: [[TMP14:%.*]] = phi i8 [ 0, [[THEN]] ], [ [[TMP6]], [[ELSE]] ] -; OPT-NEXT: [[TMP15:%.*]] = phi i8 [ 0, [[THEN]] ], [ [[TMP7]], [[ELSE]] ] -; OPT-NEXT: [[TMP16:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> poison, <4 x i8> [[TMP8]], i64 0) -; OPT-NEXT: [[TMP17:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[TMP16]], <4 x i8> [[TMP9]], i64 4) -; OPT-NEXT: [[TMP18:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[TMP17]], <4 x i8> [[TMP10]], i64 8) -; OPT-NEXT: [[TMP19:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[TMP18]], <4 x i8> [[TMP11]], i64 12) -; OPT-NEXT: [[TMP20:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[TMP19]], <4 x i8> [[TMP12]], i64 16) -; OPT-NEXT: [[TMP21:%.*]] = insertelement <23 x i8> [[TMP20]], i8 [[TMP13]], i64 20 -; OPT-NEXT: [[TMP22:%.*]] = insertelement <23 x i8> [[TMP21]], i8 [[TMP14]], i64 21 -; OPT-NEXT: [[TMP23:%.*]] = insertelement <23 x i8> [[TMP22]], i8 [[TMP15]], i64 22 -; OPT-NEXT: store <23 x i8> [[TMP23]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ zeroinitializer, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi i8 [ 0, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi i8 [ 0, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[TMP7:%.*]] = phi i8 [ 0, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE15]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> poison, <4 x i8> [[TMP0]], i64 0) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[LARGEPHI_INSERTSLICE0]], <4 x i8> [[TMP1]], i64 4) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[LARGEPHI_INSERTSLICE1]], <4 x i8> [[TMP2]], i64 8) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[LARGEPHI_INSERTSLICE2]], <4 x i8> [[TMP3]], i64 12) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = call <23 x i8> @llvm.vector.insert.v23i8.v4i8(<23 x i8> [[LARGEPHI_INSERTSLICE3]], <4 x i8> [[TMP4]], i64 16) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <23 x i8> [[LARGEPHI_INSERTSLICE4]], i8 [[TMP5]], i64 20 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = insertelement <23 x i8> [[LARGEPHI_INSERTSLICE5]], i8 [[TMP6]], i64 21 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE7:%.*]] = insertelement <23 x i8> [[LARGEPHI_INSERTSLICE6]], i8 [[TMP7]], i64 22 +; OPT-NEXT: store <23 x i8> [[LARGEPHI_INSERTSLICE7]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v23i8_zeroinit( @@ -634,27 +634,27 @@ ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <15 x i8> [[IN:%.*]], i8 64, i32 6 -; OPT-NEXT: [[TMP0:%.*]] = shufflevector <15 x i8> [[Y]], <15 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <15 x i8> [[Y]], <15 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP2:%.*]] = shufflevector <15 x i8> [[Y]], <15 x i8> poison, <4 x i32> -; OPT-NEXT: [[TMP3:%.*]] = extractelement <15 x i8> [[Y]], i64 12 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <15 x i8> [[Y]], i64 13 -; OPT-NEXT: [[TMP5:%.*]] = extractelement <15 x i8> [[Y]], i64 14 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = shufflevector <15 x i8> [[Y]], <15 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = shufflevector <15 x i8> [[Y]], <15 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = shufflevector <15 x i8> [[Y]], <15 x i8> poison, <4 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <15 x i8> [[Y]], i64 12 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <15 x i8> [[Y]], i64 13 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <15 x i8> [[Y]], i64 14 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP6:%.*]] = phi <4 x i8> [ , [[THEN]] ], [ [[TMP0]], [[ELSE]] ] -; OPT-NEXT: [[TMP7:%.*]] = phi <4 x i8> [ , [[THEN]] ], [ [[TMP1]], [[ELSE]] ] -; OPT-NEXT: [[TMP8:%.*]] = phi <4 x i8> [ , [[THEN]] ], [ [[TMP2]], [[ELSE]] ] -; OPT-NEXT: [[TMP9:%.*]] = phi i8 [ 13, [[THEN]] ], [ [[TMP3]], [[ELSE]] ] -; OPT-NEXT: [[TMP10:%.*]] = phi i8 [ 14, [[THEN]] ], [ [[TMP4]], [[ELSE]] ] -; OPT-NEXT: [[TMP11:%.*]] = phi i8 [ undef, [[THEN]] ], [ [[TMP5]], [[ELSE]] ] -; OPT-NEXT: [[TMP12:%.*]] = call <15 x i8> @llvm.vector.insert.v15i8.v4i8(<15 x i8> poison, <4 x i8> [[TMP6]], i64 0) -; OPT-NEXT: [[TMP13:%.*]] = call <15 x i8> @llvm.vector.insert.v15i8.v4i8(<15 x i8> [[TMP12]], <4 x i8> [[TMP7]], i64 4) -; OPT-NEXT: [[TMP14:%.*]] = call <15 x i8> @llvm.vector.insert.v15i8.v4i8(<15 x i8> [[TMP13]], <4 x i8> [[TMP8]], i64 8) -; OPT-NEXT: [[TMP15:%.*]] = insertelement <15 x i8> [[TMP14]], i8 [[TMP9]], i64 12 -; OPT-NEXT: [[TMP16:%.*]] = insertelement <15 x i8> [[TMP15]], i8 [[TMP10]], i64 13 -; OPT-NEXT: [[TMP17:%.*]] = insertelement <15 x i8> [[TMP16]], i8 [[TMP11]], i64 14 -; OPT-NEXT: store <15 x i8> [[TMP17]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ , [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ , [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi <4 x i8> [ , [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i8 [ 13, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi i8 [ 14, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi i8 [ undef, [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = call <15 x i8> @llvm.vector.insert.v15i8.v4i8(<15 x i8> poison, <4 x i8> [[TMP0]], i64 0) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = call <15 x i8> @llvm.vector.insert.v15i8.v4i8(<15 x i8> [[LARGEPHI_INSERTSLICE0]], <4 x i8> [[TMP1]], i64 4) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = call <15 x i8> @llvm.vector.insert.v15i8.v4i8(<15 x i8> [[LARGEPHI_INSERTSLICE1]], <4 x i8> [[TMP2]], i64 8) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <15 x i8> [[LARGEPHI_INSERTSLICE2]], i8 [[TMP3]], i64 12 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <15 x i8> [[LARGEPHI_INSERTSLICE3]], i8 [[TMP4]], i64 13 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <15 x i8> [[LARGEPHI_INSERTSLICE4]], i8 [[TMP5]], i64 14 +; OPT-NEXT: store <15 x i8> [[LARGEPHI_INSERTSLICE5]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v15i8_random_constant_init( @@ -689,104 +689,104 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <23 x i32> [[IN:%.*]], i32 42, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = extractelement <23 x i32> [[X]], i64 0 -; OPT-NEXT: [[TMP1:%.*]] = extractelement <23 x i32> [[X]], i64 1 -; OPT-NEXT: [[TMP2:%.*]] = extractelement <23 x i32> [[X]], i64 2 -; OPT-NEXT: [[TMP3:%.*]] = extractelement <23 x i32> [[X]], i64 3 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <23 x i32> [[X]], i64 4 -; OPT-NEXT: [[TMP5:%.*]] = extractelement <23 x i32> [[X]], i64 5 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <23 x i32> [[X]], i64 6 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <23 x i32> [[X]], i64 7 -; OPT-NEXT: [[TMP8:%.*]] = extractelement <23 x i32> [[X]], i64 8 -; OPT-NEXT: [[TMP9:%.*]] = extractelement <23 x i32> [[X]], i64 9 -; OPT-NEXT: [[TMP10:%.*]] = extractelement <23 x i32> [[X]], i64 10 -; OPT-NEXT: [[TMP11:%.*]] = extractelement <23 x i32> [[X]], i64 11 -; OPT-NEXT: [[TMP12:%.*]] = extractelement <23 x i32> [[X]], i64 12 -; OPT-NEXT: [[TMP13:%.*]] = extractelement <23 x i32> [[X]], i64 13 -; OPT-NEXT: [[TMP14:%.*]] = extractelement <23 x i32> [[X]], i64 14 -; OPT-NEXT: [[TMP15:%.*]] = extractelement <23 x i32> [[X]], i64 15 -; OPT-NEXT: [[TMP16:%.*]] = extractelement <23 x i32> [[X]], i64 16 -; OPT-NEXT: [[TMP17:%.*]] = extractelement <23 x i32> [[X]], i64 17 -; OPT-NEXT: [[TMP18:%.*]] = extractelement <23 x i32> [[X]], i64 18 -; OPT-NEXT: [[TMP19:%.*]] = extractelement <23 x i32> [[X]], i64 19 -; OPT-NEXT: [[TMP20:%.*]] = extractelement <23 x i32> [[X]], i64 20 -; OPT-NEXT: [[TMP21:%.*]] = extractelement <23 x i32> [[X]], i64 21 -; OPT-NEXT: [[TMP22:%.*]] = extractelement <23 x i32> [[X]], i64 22 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <23 x i32> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <23 x i32> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <23 x i32> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <23 x i32> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <23 x i32> [[X]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <23 x i32> [[X]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <23 x i32> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE14:%.*]] = extractelement <23 x i32> [[X]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE16:%.*]] = extractelement <23 x i32> [[X]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE18:%.*]] = extractelement <23 x i32> [[X]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE20:%.*]] = extractelement <23 x i32> [[X]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE22:%.*]] = extractelement <23 x i32> [[X]], i64 11 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE24:%.*]] = extractelement <23 x i32> [[X]], i64 12 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE26:%.*]] = extractelement <23 x i32> [[X]], i64 13 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE28:%.*]] = extractelement <23 x i32> [[X]], i64 14 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE30:%.*]] = extractelement <23 x i32> [[X]], i64 15 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE32:%.*]] = extractelement <23 x i32> [[X]], i64 16 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <23 x i32> [[X]], i64 17 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE36:%.*]] = extractelement <23 x i32> [[X]], i64 18 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE38:%.*]] = extractelement <23 x i32> [[X]], i64 19 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE40:%.*]] = extractelement <23 x i32> [[X]], i64 20 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE42:%.*]] = extractelement <23 x i32> [[X]], i64 21 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE44:%.*]] = extractelement <23 x i32> [[X]], i64 22 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <23 x i32> [[IN]], i32 64, i32 6 -; OPT-NEXT: [[TMP23:%.*]] = extractelement <23 x i32> [[Y]], i64 0 -; OPT-NEXT: [[TMP24:%.*]] = extractelement <23 x i32> [[Y]], i64 1 -; OPT-NEXT: [[TMP25:%.*]] = extractelement <23 x i32> [[Y]], i64 2 -; OPT-NEXT: [[TMP26:%.*]] = extractelement <23 x i32> [[Y]], i64 3 -; OPT-NEXT: [[TMP27:%.*]] = extractelement <23 x i32> [[Y]], i64 4 -; OPT-NEXT: [[TMP28:%.*]] = extractelement <23 x i32> [[Y]], i64 5 -; OPT-NEXT: [[TMP29:%.*]] = extractelement <23 x i32> [[Y]], i64 6 -; OPT-NEXT: [[TMP30:%.*]] = extractelement <23 x i32> [[Y]], i64 7 -; OPT-NEXT: [[TMP31:%.*]] = extractelement <23 x i32> [[Y]], i64 8 -; OPT-NEXT: [[TMP32:%.*]] = extractelement <23 x i32> [[Y]], i64 9 -; OPT-NEXT: [[TMP33:%.*]] = extractelement <23 x i32> [[Y]], i64 10 -; OPT-NEXT: [[TMP34:%.*]] = extractelement <23 x i32> [[Y]], i64 11 -; OPT-NEXT: [[TMP35:%.*]] = extractelement <23 x i32> [[Y]], i64 12 -; OPT-NEXT: [[TMP36:%.*]] = extractelement <23 x i32> [[Y]], i64 13 -; OPT-NEXT: [[TMP37:%.*]] = extractelement <23 x i32> [[Y]], i64 14 -; OPT-NEXT: [[TMP38:%.*]] = extractelement <23 x i32> [[Y]], i64 15 -; OPT-NEXT: [[TMP39:%.*]] = extractelement <23 x i32> [[Y]], i64 16 -; OPT-NEXT: [[TMP40:%.*]] = extractelement <23 x i32> [[Y]], i64 17 -; OPT-NEXT: [[TMP41:%.*]] = extractelement <23 x i32> [[Y]], i64 18 -; OPT-NEXT: [[TMP42:%.*]] = extractelement <23 x i32> [[Y]], i64 19 -; OPT-NEXT: [[TMP43:%.*]] = extractelement <23 x i32> [[Y]], i64 20 -; OPT-NEXT: [[TMP44:%.*]] = extractelement <23 x i32> [[Y]], i64 21 -; OPT-NEXT: [[TMP45:%.*]] = extractelement <23 x i32> [[Y]], i64 22 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <23 x i32> [[Y]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <23 x i32> [[Y]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <23 x i32> [[Y]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <23 x i32> [[Y]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <23 x i32> [[Y]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <23 x i32> [[Y]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <23 x i32> [[Y]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE15:%.*]] = extractelement <23 x i32> [[Y]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE17:%.*]] = extractelement <23 x i32> [[Y]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE19:%.*]] = extractelement <23 x i32> [[Y]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE21:%.*]] = extractelement <23 x i32> [[Y]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <23 x i32> [[Y]], i64 11 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE25:%.*]] = extractelement <23 x i32> [[Y]], i64 12 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE27:%.*]] = extractelement <23 x i32> [[Y]], i64 13 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE29:%.*]] = extractelement <23 x i32> [[Y]], i64 14 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE31:%.*]] = extractelement <23 x i32> [[Y]], i64 15 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE33:%.*]] = extractelement <23 x i32> [[Y]], i64 16 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE35:%.*]] = extractelement <23 x i32> [[Y]], i64 17 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE37:%.*]] = extractelement <23 x i32> [[Y]], i64 18 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE39:%.*]] = extractelement <23 x i32> [[Y]], i64 19 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE41:%.*]] = extractelement <23 x i32> [[Y]], i64 20 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE43:%.*]] = extractelement <23 x i32> [[Y]], i64 21 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <23 x i32> [[Y]], i64 22 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP46:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[TMP23]], [[ELSE]] ] -; OPT-NEXT: [[TMP47:%.*]] = phi i32 [ [[TMP1]], [[THEN]] ], [ [[TMP24]], [[ELSE]] ] -; OPT-NEXT: [[TMP48:%.*]] = phi i32 [ [[TMP2]], [[THEN]] ], [ [[TMP25]], [[ELSE]] ] -; OPT-NEXT: [[TMP49:%.*]] = phi i32 [ [[TMP3]], [[THEN]] ], [ [[TMP26]], [[ELSE]] ] -; OPT-NEXT: [[TMP50:%.*]] = phi i32 [ [[TMP4]], [[THEN]] ], [ [[TMP27]], [[ELSE]] ] -; OPT-NEXT: [[TMP51:%.*]] = phi i32 [ [[TMP5]], [[THEN]] ], [ [[TMP28]], [[ELSE]] ] -; OPT-NEXT: [[TMP52:%.*]] = phi i32 [ [[TMP6]], [[THEN]] ], [ [[TMP29]], [[ELSE]] ] -; OPT-NEXT: [[TMP53:%.*]] = phi i32 [ [[TMP7]], [[THEN]] ], [ [[TMP30]], [[ELSE]] ] -; OPT-NEXT: [[TMP54:%.*]] = phi i32 [ [[TMP8]], [[THEN]] ], [ [[TMP31]], [[ELSE]] ] -; OPT-NEXT: [[TMP55:%.*]] = phi i32 [ [[TMP9]], [[THEN]] ], [ [[TMP32]], [[ELSE]] ] -; OPT-NEXT: [[TMP56:%.*]] = phi i32 [ [[TMP10]], [[THEN]] ], [ [[TMP33]], [[ELSE]] ] -; OPT-NEXT: [[TMP57:%.*]] = phi i32 [ [[TMP11]], [[THEN]] ], [ [[TMP34]], [[ELSE]] ] -; OPT-NEXT: [[TMP58:%.*]] = phi i32 [ [[TMP12]], [[THEN]] ], [ [[TMP35]], [[ELSE]] ] -; OPT-NEXT: [[TMP59:%.*]] = phi i32 [ [[TMP13]], [[THEN]] ], [ [[TMP36]], [[ELSE]] ] -; OPT-NEXT: [[TMP60:%.*]] = phi i32 [ [[TMP14]], [[THEN]] ], [ [[TMP37]], [[ELSE]] ] -; OPT-NEXT: [[TMP61:%.*]] = phi i32 [ [[TMP15]], [[THEN]] ], [ [[TMP38]], [[ELSE]] ] -; OPT-NEXT: [[TMP62:%.*]] = phi i32 [ [[TMP16]], [[THEN]] ], [ [[TMP39]], [[ELSE]] ] -; OPT-NEXT: [[TMP63:%.*]] = phi i32 [ [[TMP17]], [[THEN]] ], [ [[TMP40]], [[ELSE]] ] -; OPT-NEXT: [[TMP64:%.*]] = phi i32 [ [[TMP18]], [[THEN]] ], [ [[TMP41]], [[ELSE]] ] -; OPT-NEXT: [[TMP65:%.*]] = phi i32 [ [[TMP19]], [[THEN]] ], [ [[TMP42]], [[ELSE]] ] -; OPT-NEXT: [[TMP66:%.*]] = phi i32 [ [[TMP20]], [[THEN]] ], [ [[TMP43]], [[ELSE]] ] -; OPT-NEXT: [[TMP67:%.*]] = phi i32 [ [[TMP21]], [[THEN]] ], [ [[TMP44]], [[ELSE]] ] -; OPT-NEXT: [[TMP68:%.*]] = phi i32 [ [[TMP22]], [[THEN]] ], [ [[TMP45]], [[ELSE]] ] -; OPT-NEXT: [[TMP69:%.*]] = insertelement <23 x i32> poison, i32 [[TMP46]], i64 0 -; OPT-NEXT: [[TMP70:%.*]] = insertelement <23 x i32> [[TMP69]], i32 [[TMP47]], i64 1 -; OPT-NEXT: [[TMP71:%.*]] = insertelement <23 x i32> [[TMP70]], i32 [[TMP48]], i64 2 -; OPT-NEXT: [[TMP72:%.*]] = insertelement <23 x i32> [[TMP71]], i32 [[TMP49]], i64 3 -; OPT-NEXT: [[TMP73:%.*]] = insertelement <23 x i32> [[TMP72]], i32 [[TMP50]], i64 4 -; OPT-NEXT: [[TMP74:%.*]] = insertelement <23 x i32> [[TMP73]], i32 [[TMP51]], i64 5 -; OPT-NEXT: [[TMP75:%.*]] = insertelement <23 x i32> [[TMP74]], i32 [[TMP52]], i64 6 -; OPT-NEXT: [[TMP76:%.*]] = insertelement <23 x i32> [[TMP75]], i32 [[TMP53]], i64 7 -; OPT-NEXT: [[TMP77:%.*]] = insertelement <23 x i32> [[TMP76]], i32 [[TMP54]], i64 8 -; OPT-NEXT: [[TMP78:%.*]] = insertelement <23 x i32> [[TMP77]], i32 [[TMP55]], i64 9 -; OPT-NEXT: [[TMP79:%.*]] = insertelement <23 x i32> [[TMP78]], i32 [[TMP56]], i64 10 -; OPT-NEXT: [[TMP80:%.*]] = insertelement <23 x i32> [[TMP79]], i32 [[TMP57]], i64 11 -; OPT-NEXT: [[TMP81:%.*]] = insertelement <23 x i32> [[TMP80]], i32 [[TMP58]], i64 12 -; OPT-NEXT: [[TMP82:%.*]] = insertelement <23 x i32> [[TMP81]], i32 [[TMP59]], i64 13 -; OPT-NEXT: [[TMP83:%.*]] = insertelement <23 x i32> [[TMP82]], i32 [[TMP60]], i64 14 -; OPT-NEXT: [[TMP84:%.*]] = insertelement <23 x i32> [[TMP83]], i32 [[TMP61]], i64 15 -; OPT-NEXT: [[TMP85:%.*]] = insertelement <23 x i32> [[TMP84]], i32 [[TMP62]], i64 16 -; OPT-NEXT: [[TMP86:%.*]] = insertelement <23 x i32> [[TMP85]], i32 [[TMP63]], i64 17 -; OPT-NEXT: [[TMP87:%.*]] = insertelement <23 x i32> [[TMP86]], i32 [[TMP64]], i64 18 -; OPT-NEXT: [[TMP88:%.*]] = insertelement <23 x i32> [[TMP87]], i32 [[TMP65]], i64 19 -; OPT-NEXT: [[TMP89:%.*]] = insertelement <23 x i32> [[TMP88]], i32 [[TMP66]], i64 20 -; OPT-NEXT: [[TMP90:%.*]] = insertelement <23 x i32> [[TMP89]], i32 [[TMP67]], i64 21 -; OPT-NEXT: [[TMP91:%.*]] = insertelement <23 x i32> [[TMP90]], i32 [[TMP68]], i64 22 -; OPT-NEXT: store <23 x i32> [[TMP91]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[TMP7:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE14]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE15]], [[ELSE]] ] +; OPT-NEXT: [[TMP8:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE16]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE17]], [[ELSE]] ] +; OPT-NEXT: [[TMP9:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE18]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE19]], [[ELSE]] ] +; OPT-NEXT: [[TMP10:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE20]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE21]], [[ELSE]] ] +; OPT-NEXT: [[TMP11:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE22]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] +; OPT-NEXT: [[TMP12:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE24]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE25]], [[ELSE]] ] +; OPT-NEXT: [[TMP13:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE26]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE27]], [[ELSE]] ] +; OPT-NEXT: [[TMP14:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE28]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE29]], [[ELSE]] ] +; OPT-NEXT: [[TMP15:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE30]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE31]], [[ELSE]] ] +; OPT-NEXT: [[TMP16:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE32]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE33]], [[ELSE]] ] +; OPT-NEXT: [[TMP17:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE34]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE35]], [[ELSE]] ] +; OPT-NEXT: [[TMP18:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE36]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE37]], [[ELSE]] ] +; OPT-NEXT: [[TMP19:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE38]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE39]], [[ELSE]] ] +; OPT-NEXT: [[TMP20:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE40]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE41]], [[ELSE]] ] +; OPT-NEXT: [[TMP21:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE42]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE43]], [[ELSE]] ] +; OPT-NEXT: [[TMP22:%.*]] = phi i32 [ [[LARGEPHI_EXTRACTSLICE44]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <23 x i32> poison, i32 [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE0]], i32 [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE1]], i32 [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE2]], i32 [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE3]], i32 [[TMP4]], i64 4 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE4]], i32 [[TMP5]], i64 5 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE5]], i32 [[TMP6]], i64 6 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE7:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE6]], i32 [[TMP7]], i64 7 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE8:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE7]], i32 [[TMP8]], i64 8 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE9:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE8]], i32 [[TMP9]], i64 9 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE10:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE9]], i32 [[TMP10]], i64 10 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE11:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE10]], i32 [[TMP11]], i64 11 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE12:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE11]], i32 [[TMP12]], i64 12 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE13:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE12]], i32 [[TMP13]], i64 13 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE14:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE13]], i32 [[TMP14]], i64 14 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE15:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE14]], i32 [[TMP15]], i64 15 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE16:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE15]], i32 [[TMP16]], i64 16 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE17:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE16]], i32 [[TMP17]], i64 17 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE18:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE17]], i32 [[TMP18]], i64 18 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE19:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE18]], i32 [[TMP19]], i64 19 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE20:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE19]], i32 [[TMP20]], i64 20 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE21:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE20]], i32 [[TMP21]], i64 21 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE22:%.*]] = insertelement <23 x i32> [[LARGEPHI_INSERTSLICE21]], i32 [[TMP22]], i64 22 +; OPT-NEXT: store <23 x i32> [[LARGEPHI_INSERTSLICE22]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v23i32( @@ -823,76 +823,76 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <16 x i64> [[IN:%.*]], i64 42, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = extractelement <16 x i64> [[X]], i64 0 -; OPT-NEXT: [[TMP1:%.*]] = extractelement <16 x i64> [[X]], i64 1 -; OPT-NEXT: [[TMP2:%.*]] = extractelement <16 x i64> [[X]], i64 2 -; OPT-NEXT: [[TMP3:%.*]] = extractelement <16 x i64> [[X]], i64 3 -; OPT-NEXT: [[TMP4:%.*]] = extractelement <16 x i64> [[X]], i64 4 -; OPT-NEXT: [[TMP5:%.*]] = extractelement <16 x i64> [[X]], i64 5 -; OPT-NEXT: [[TMP6:%.*]] = extractelement <16 x i64> [[X]], i64 6 -; OPT-NEXT: [[TMP7:%.*]] = extractelement <16 x i64> [[X]], i64 7 -; OPT-NEXT: [[TMP8:%.*]] = extractelement <16 x i64> [[X]], i64 8 -; OPT-NEXT: [[TMP9:%.*]] = extractelement <16 x i64> [[X]], i64 9 -; OPT-NEXT: [[TMP10:%.*]] = extractelement <16 x i64> [[X]], i64 10 -; OPT-NEXT: [[TMP11:%.*]] = extractelement <16 x i64> [[X]], i64 11 -; OPT-NEXT: [[TMP12:%.*]] = extractelement <16 x i64> [[X]], i64 12 -; OPT-NEXT: [[TMP13:%.*]] = extractelement <16 x i64> [[X]], i64 13 -; OPT-NEXT: [[TMP14:%.*]] = extractelement <16 x i64> [[X]], i64 14 -; OPT-NEXT: [[TMP15:%.*]] = extractelement <16 x i64> [[X]], i64 15 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <16 x i64> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <16 x i64> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <16 x i64> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <16 x i64> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <16 x i64> [[X]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <16 x i64> [[X]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <16 x i64> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE14:%.*]] = extractelement <16 x i64> [[X]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE16:%.*]] = extractelement <16 x i64> [[X]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE18:%.*]] = extractelement <16 x i64> [[X]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE20:%.*]] = extractelement <16 x i64> [[X]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE22:%.*]] = extractelement <16 x i64> [[X]], i64 11 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE24:%.*]] = extractelement <16 x i64> [[X]], i64 12 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE26:%.*]] = extractelement <16 x i64> [[X]], i64 13 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE28:%.*]] = extractelement <16 x i64> [[X]], i64 14 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE30:%.*]] = extractelement <16 x i64> [[X]], i64 15 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <16 x i64> [[IN]], i64 64, i32 6 -; OPT-NEXT: [[TMP16:%.*]] = extractelement <16 x i64> [[Y]], i64 0 -; OPT-NEXT: [[TMP17:%.*]] = extractelement <16 x i64> [[Y]], i64 1 -; OPT-NEXT: [[TMP18:%.*]] = extractelement <16 x i64> [[Y]], i64 2 -; OPT-NEXT: [[TMP19:%.*]] = extractelement <16 x i64> [[Y]], i64 3 -; OPT-NEXT: [[TMP20:%.*]] = extractelement <16 x i64> [[Y]], i64 4 -; OPT-NEXT: [[TMP21:%.*]] = extractelement <16 x i64> [[Y]], i64 5 -; OPT-NEXT: [[TMP22:%.*]] = extractelement <16 x i64> [[Y]], i64 6 -; OPT-NEXT: [[TMP23:%.*]] = extractelement <16 x i64> [[Y]], i64 7 -; OPT-NEXT: [[TMP24:%.*]] = extractelement <16 x i64> [[Y]], i64 8 -; OPT-NEXT: [[TMP25:%.*]] = extractelement <16 x i64> [[Y]], i64 9 -; OPT-NEXT: [[TMP26:%.*]] = extractelement <16 x i64> [[Y]], i64 10 -; OPT-NEXT: [[TMP27:%.*]] = extractelement <16 x i64> [[Y]], i64 11 -; OPT-NEXT: [[TMP28:%.*]] = extractelement <16 x i64> [[Y]], i64 12 -; OPT-NEXT: [[TMP29:%.*]] = extractelement <16 x i64> [[Y]], i64 13 -; OPT-NEXT: [[TMP30:%.*]] = extractelement <16 x i64> [[Y]], i64 14 -; OPT-NEXT: [[TMP31:%.*]] = extractelement <16 x i64> [[Y]], i64 15 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <16 x i64> [[Y]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <16 x i64> [[Y]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <16 x i64> [[Y]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <16 x i64> [[Y]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <16 x i64> [[Y]], i64 4 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <16 x i64> [[Y]], i64 5 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <16 x i64> [[Y]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE15:%.*]] = extractelement <16 x i64> [[Y]], i64 7 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE17:%.*]] = extractelement <16 x i64> [[Y]], i64 8 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE19:%.*]] = extractelement <16 x i64> [[Y]], i64 9 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE21:%.*]] = extractelement <16 x i64> [[Y]], i64 10 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <16 x i64> [[Y]], i64 11 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE25:%.*]] = extractelement <16 x i64> [[Y]], i64 12 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE27:%.*]] = extractelement <16 x i64> [[Y]], i64 13 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE29:%.*]] = extractelement <16 x i64> [[Y]], i64 14 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE31:%.*]] = extractelement <16 x i64> [[Y]], i64 15 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP32:%.*]] = phi i64 [ [[TMP0]], [[THEN]] ], [ [[TMP16]], [[ELSE]] ] -; OPT-NEXT: [[TMP33:%.*]] = phi i64 [ [[TMP1]], [[THEN]] ], [ [[TMP17]], [[ELSE]] ] -; OPT-NEXT: [[TMP34:%.*]] = phi i64 [ [[TMP2]], [[THEN]] ], [ [[TMP18]], [[ELSE]] ] -; OPT-NEXT: [[TMP35:%.*]] = phi i64 [ [[TMP3]], [[THEN]] ], [ [[TMP19]], [[ELSE]] ] -; OPT-NEXT: [[TMP36:%.*]] = phi i64 [ [[TMP4]], [[THEN]] ], [ [[TMP20]], [[ELSE]] ] -; OPT-NEXT: [[TMP37:%.*]] = phi i64 [ [[TMP5]], [[THEN]] ], [ [[TMP21]], [[ELSE]] ] -; OPT-NEXT: [[TMP38:%.*]] = phi i64 [ [[TMP6]], [[THEN]] ], [ [[TMP22]], [[ELSE]] ] -; OPT-NEXT: [[TMP39:%.*]] = phi i64 [ [[TMP7]], [[THEN]] ], [ [[TMP23]], [[ELSE]] ] -; OPT-NEXT: [[TMP40:%.*]] = phi i64 [ [[TMP8]], [[THEN]] ], [ [[TMP24]], [[ELSE]] ] -; OPT-NEXT: [[TMP41:%.*]] = phi i64 [ [[TMP9]], [[THEN]] ], [ [[TMP25]], [[ELSE]] ] -; OPT-NEXT: [[TMP42:%.*]] = phi i64 [ [[TMP10]], [[THEN]] ], [ [[TMP26]], [[ELSE]] ] -; OPT-NEXT: [[TMP43:%.*]] = phi i64 [ [[TMP11]], [[THEN]] ], [ [[TMP27]], [[ELSE]] ] -; OPT-NEXT: [[TMP44:%.*]] = phi i64 [ [[TMP12]], [[THEN]] ], [ [[TMP28]], [[ELSE]] ] -; OPT-NEXT: [[TMP45:%.*]] = phi i64 [ [[TMP13]], [[THEN]] ], [ [[TMP29]], [[ELSE]] ] -; OPT-NEXT: [[TMP46:%.*]] = phi i64 [ [[TMP14]], [[THEN]] ], [ [[TMP30]], [[ELSE]] ] -; OPT-NEXT: [[TMP47:%.*]] = phi i64 [ [[TMP15]], [[THEN]] ], [ [[TMP31]], [[ELSE]] ] -; OPT-NEXT: [[TMP48:%.*]] = insertelement <16 x i64> poison, i64 [[TMP32]], i64 0 -; OPT-NEXT: [[TMP49:%.*]] = insertelement <16 x i64> [[TMP48]], i64 [[TMP33]], i64 1 -; OPT-NEXT: [[TMP50:%.*]] = insertelement <16 x i64> [[TMP49]], i64 [[TMP34]], i64 2 -; OPT-NEXT: [[TMP51:%.*]] = insertelement <16 x i64> [[TMP50]], i64 [[TMP35]], i64 3 -; OPT-NEXT: [[TMP52:%.*]] = insertelement <16 x i64> [[TMP51]], i64 [[TMP36]], i64 4 -; OPT-NEXT: [[TMP53:%.*]] = insertelement <16 x i64> [[TMP52]], i64 [[TMP37]], i64 5 -; OPT-NEXT: [[TMP54:%.*]] = insertelement <16 x i64> [[TMP53]], i64 [[TMP38]], i64 6 -; OPT-NEXT: [[TMP55:%.*]] = insertelement <16 x i64> [[TMP54]], i64 [[TMP39]], i64 7 -; OPT-NEXT: [[TMP56:%.*]] = insertelement <16 x i64> [[TMP55]], i64 [[TMP40]], i64 8 -; OPT-NEXT: [[TMP57:%.*]] = insertelement <16 x i64> [[TMP56]], i64 [[TMP41]], i64 9 -; OPT-NEXT: [[TMP58:%.*]] = insertelement <16 x i64> [[TMP57]], i64 [[TMP42]], i64 10 -; OPT-NEXT: [[TMP59:%.*]] = insertelement <16 x i64> [[TMP58]], i64 [[TMP43]], i64 11 -; OPT-NEXT: [[TMP60:%.*]] = insertelement <16 x i64> [[TMP59]], i64 [[TMP44]], i64 12 -; OPT-NEXT: [[TMP61:%.*]] = insertelement <16 x i64> [[TMP60]], i64 [[TMP45]], i64 13 -; OPT-NEXT: [[TMP62:%.*]] = insertelement <16 x i64> [[TMP61]], i64 [[TMP46]], i64 14 -; OPT-NEXT: [[TMP63:%.*]] = insertelement <16 x i64> [[TMP62]], i64 [[TMP47]], i64 15 -; OPT-NEXT: store <16 x i64> [[TMP63]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[ELSE]] ] +; OPT-NEXT: [[TMP5:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[TMP7:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE14]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE15]], [[ELSE]] ] +; OPT-NEXT: [[TMP8:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE16]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE17]], [[ELSE]] ] +; OPT-NEXT: [[TMP9:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE18]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE19]], [[ELSE]] ] +; OPT-NEXT: [[TMP10:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE20]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE21]], [[ELSE]] ] +; OPT-NEXT: [[TMP11:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE22]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] +; OPT-NEXT: [[TMP12:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE24]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE25]], [[ELSE]] ] +; OPT-NEXT: [[TMP13:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE26]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE27]], [[ELSE]] ] +; OPT-NEXT: [[TMP14:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE28]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE29]], [[ELSE]] ] +; OPT-NEXT: [[TMP15:%.*]] = phi i64 [ [[LARGEPHI_EXTRACTSLICE30]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE31]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <16 x i64> poison, i64 [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE0]], i64 [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE1]], i64 [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE2]], i64 [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE3]], i64 [[TMP4]], i64 4 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE5:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE4]], i64 [[TMP5]], i64 5 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE6:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE5]], i64 [[TMP6]], i64 6 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE7:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE6]], i64 [[TMP7]], i64 7 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE8:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE7]], i64 [[TMP8]], i64 8 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE9:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE8]], i64 [[TMP9]], i64 9 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE10:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE9]], i64 [[TMP10]], i64 10 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE11:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE10]], i64 [[TMP11]], i64 11 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE12:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE11]], i64 [[TMP12]], i64 12 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE13:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE12]], i64 [[TMP13]], i64 13 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE14:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE13]], i64 [[TMP14]], i64 14 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE15:%.*]] = insertelement <16 x i64> [[LARGEPHI_INSERTSLICE14]], i64 [[TMP15]], i64 15 +; OPT-NEXT: store <16 x i64> [[LARGEPHI_INSERTSLICE15]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v16i64( @@ -929,28 +929,28 @@ ; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; OPT: then: ; OPT-NEXT: [[X:%.*]] = insertelement <7 x i16> [[IN:%.*]], i16 3, i32 3 -; OPT-NEXT: [[TMP0:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP2:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP3:%.*]] = extractelement <7 x i16> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <7 x i16> [[X]], i64 6 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <7 x i16> [[IN]], i16 9, i32 6 -; OPT-NEXT: [[TMP4:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP5:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP6:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP7:%.*]] = extractelement <7 x i16> [[Y]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <7 x i16> [[Y]], i64 6 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[TMP0]], [[THEN]] ], [ [[TMP4]], [[ELSE]] ] -; OPT-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ [[TMP1]], [[THEN]] ], [ [[TMP5]], [[ELSE]] ] -; OPT-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], [[THEN]] ], [ [[TMP6]], [[ELSE]] ] -; OPT-NEXT: [[TMP11:%.*]] = phi i16 [ [[TMP3]], [[THEN]] ], [ [[TMP7]], [[ELSE]] ] -; OPT-NEXT: [[TMP12:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> poison, <2 x i16> [[TMP8]], i64 0) -; OPT-NEXT: [[TMP13:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> [[TMP12]], <2 x i16> [[TMP9]], i64 2) -; OPT-NEXT: [[TMP14:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> [[TMP13]], <2 x i16> [[TMP10]], i64 4) -; OPT-NEXT: [[TMP15:%.*]] = insertelement <7 x i16> [[TMP14]], i16 [[TMP11]], i64 6 -; OPT-NEXT: store <7 x i16> [[TMP15]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i16 [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> poison, <2 x i16> [[TMP0]], i64 0) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> [[LARGEPHI_INSERTSLICE0]], <2 x i16> [[TMP1]], i64 2) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> [[LARGEPHI_INSERTSLICE1]], <2 x i16> [[TMP2]], i64 4) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <7 x i16> [[LARGEPHI_INSERTSLICE2]], i16 [[TMP3]], i64 6 +; OPT-NEXT: store <7 x i16> [[LARGEPHI_INSERTSLICE3]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v7i16( @@ -991,34 +991,34 @@ ; OPT-NEXT: i8 3, label [[THEN_2:%.*]] ; OPT-NEXT: ] ; OPT: then.1: -; OPT-NEXT: [[TMP0:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP1:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP2:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP3:%.*]] = extractelement <7 x i16> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <7 x i16> [[X]], i64 6 ; OPT-NEXT: br label [[FINALLY:%.*]] ; OPT: then.2: -; OPT-NEXT: [[TMP4:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP5:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP6:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP7:%.*]] = extractelement <7 x i16> [[X]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = shufflevector <7 x i16> [[X]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <7 x i16> [[X]], i64 6 ; OPT-NEXT: br label [[FINALLY]] ; OPT: else: ; OPT-NEXT: [[Y:%.*]] = insertelement <7 x i16> [[IN]], i16 9, i32 6 -; OPT-NEXT: [[TMP8:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP9:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP10:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> -; OPT-NEXT: [[TMP11:%.*]] = extractelement <7 x i16> [[Y]], i64 6 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = shufflevector <7 x i16> [[Y]], <7 x i16> poison, <2 x i32> +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE11:%.*]] = extractelement <7 x i16> [[Y]], i64 6 ; OPT-NEXT: br label [[FINALLY]] ; OPT: finally: -; OPT-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ [[TMP0]], [[THEN_1]] ], [ [[TMP4]], [[THEN_2]] ], [ [[TMP8]], [[ELSE]] ] -; OPT-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP1]], [[THEN_1]] ], [ [[TMP5]], [[THEN_2]] ], [ [[TMP9]], [[ELSE]] ] -; OPT-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP2]], [[THEN_1]] ], [ [[TMP6]], [[THEN_2]] ], [ [[TMP10]], [[ELSE]] ] -; OPT-NEXT: [[TMP15:%.*]] = phi i16 [ [[TMP3]], [[THEN_1]] ], [ [[TMP7]], [[THEN_2]] ], [ [[TMP11]], [[ELSE]] ] -; OPT-NEXT: [[TMP16:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> poison, <2 x i16> [[TMP12]], i64 0) -; OPT-NEXT: [[TMP17:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> [[TMP16]], <2 x i16> [[TMP13]], i64 2) -; OPT-NEXT: [[TMP18:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> [[TMP17]], <2 x i16> [[TMP14]], i64 4) -; OPT-NEXT: [[TMP19:%.*]] = insertelement <7 x i16> [[TMP18]], i16 [[TMP15]], i64 6 -; OPT-NEXT: store <7 x i16> [[TMP19]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN_1]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN_2]] ], [ [[LARGEPHI_EXTRACTSLICE2]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN_1]] ], [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN_2]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi <2 x i16> [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN_1]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[THEN_2]] ], [ [[LARGEPHI_EXTRACTSLICE8]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i16 [ [[LARGEPHI_EXTRACTSLICE9]], [[THEN_1]] ], [ [[LARGEPHI_EXTRACTSLICE10]], [[THEN_2]] ], [ [[LARGEPHI_EXTRACTSLICE11]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> poison, <2 x i16> [[TMP0]], i64 0) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> [[LARGEPHI_INSERTSLICE0]], <2 x i16> [[TMP1]], i64 2) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = call <7 x i16> @llvm.vector.insert.v7i16.v2i16(<7 x i16> [[LARGEPHI_INSERTSLICE1]], <2 x i16> [[TMP2]], i64 4) +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <7 x i16> [[LARGEPHI_INSERTSLICE2]], i16 [[TMP3]], i64 6 +; OPT-NEXT: store <7 x i16> [[LARGEPHI_INSERTSLICE3]], ptr [[OUT:%.*]], align 1 ; OPT-NEXT: ret void ; ; NOOPT-LABEL: @phi_v7i16_switch( @@ -1058,3 +1058,142 @@ store <7 x i16> %val, ptr %out, align 1 ret void } + +; Checks that we can deal with PHIs that have the same basic block/incoming value +; pair appear more than once in the incoming blocks. +; It's not illegal IR. However, if the pass lazily transforms all incoming values +; without checking for duplicates, it could create a PHI with the same basic block +; appearing multiple times, but with different incoming values, which is then illegal +; IR. +; The error was: +; PHI node has multiple entries for the same basic block with different incoming values! +define amdgpu_kernel void @multi_inc_same_bb(<5 x double> %in, ptr %out, i1 %cond) { +; OPT-LABEL: @multi_inc_same_bb( +; OPT-NEXT: entry: +; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; OPT: then: +; OPT-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i32 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[X]], i64 4 +; OPT-NEXT: br label [[FINALLY:%.*]] +; OPT: else: +; OPT-NEXT: [[Y:%.*]] = insertelement <5 x double> [[IN]], double 9.140000e+00, i32 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[Y]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[Y]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[Y]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <5 x double> [[Y]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <5 x double> [[Y]], i64 4 +; OPT-NEXT: br i1 [[COND]], label [[FINALLY]], label [[FINALLY]] +; OPT: finally: +; OPT-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE4]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE4]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE9]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE10]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE10]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; OPT-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: ret void +; +; NOOPT-LABEL: @multi_inc_same_bb( +; NOOPT-NEXT: entry: +; NOOPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; NOOPT: then: +; NOOPT-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i32 3 +; NOOPT-NEXT: br label [[FINALLY:%.*]] +; NOOPT: else: +; NOOPT-NEXT: [[Y:%.*]] = insertelement <5 x double> [[IN]], double 9.140000e+00, i32 2 +; NOOPT-NEXT: br i1 [[COND]], label [[FINALLY]], label [[FINALLY]] +; NOOPT: finally: +; NOOPT-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ [[Y]], [[ELSE]] ], [ [[Y]], [[ELSE]] ] +; NOOPT-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1 +; NOOPT-NEXT: ret void +; +entry: + br i1 %cond, label %then, label %else +then: + %x = insertelement <5 x double> %in, double 3.14, i32 3 + br label %finally +else: + %y = insertelement <5 x double> %in, double 9.14, i32 2 + br i1 %cond, label %finally, label %finally +finally: + %val = phi <5 x double> [%x, %then], [%y, %else], [%y, %else] + store <5 x double> %val, ptr %out, align 1 + ret void +} + +; Checks that we can ndeal with blocks that just have a PHI + terminator. +define amdgpu_kernel void @minimal_block_with_only_phi(<5 x double> %in, ptr %out, i1 %cond) { +; OPT-LABEL: @minimal_block_with_only_phi( +; OPT-NEXT: entry: +; OPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; OPT: then: +; OPT-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i32 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[X]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[X]], i64 4 +; OPT-NEXT: br label [[FINALLY:%.*]] +; OPT: else: +; OPT-NEXT: [[Y:%.*]] = insertelement <5 x double> [[IN]], double 9.140000e+00, i32 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[Y]], i64 0 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[Y]], i64 1 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[Y]], i64 2 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE10:%.*]] = extractelement <5 x double> [[Y]], i64 3 +; OPT-NEXT: [[LARGEPHI_EXTRACTSLICE13:%.*]] = extractelement <5 x double> [[Y]], i64 4 +; OPT-NEXT: br i1 [[COND]], label [[FINALLY]], label [[FINALLY]] +; OPT: finally: +; OPT-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[ELSE]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE4]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE4]], [[ELSE]] ] +; OPT-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[ELSE]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE9]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE10]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE10]], [[ELSE]] ] +; OPT-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ], [ [[LARGEPHI_EXTRACTSLICE13]], [[ELSE]] ] +; OPT-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; OPT-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; OPT-NEXT: br label [[REALLYFINALLY:%.*]] +; OPT: reallyfinally: +; OPT-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 +; OPT-NEXT: ret void +; +; NOOPT-LABEL: @minimal_block_with_only_phi( +; NOOPT-NEXT: entry: +; NOOPT-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; NOOPT: then: +; NOOPT-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i32 3 +; NOOPT-NEXT: br label [[FINALLY:%.*]] +; NOOPT: else: +; NOOPT-NEXT: [[Y:%.*]] = insertelement <5 x double> [[IN]], double 9.140000e+00, i32 2 +; NOOPT-NEXT: br i1 [[COND]], label [[FINALLY]], label [[FINALLY]] +; NOOPT: finally: +; NOOPT-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ [[Y]], [[ELSE]] ], [ [[Y]], [[ELSE]] ] +; NOOPT-NEXT: br label [[REALLYFINALLY:%.*]] +; NOOPT: reallyfinally: +; NOOPT-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1 +; NOOPT-NEXT: ret void +; +entry: + br i1 %cond, label %then, label %else +then: + %x = insertelement <5 x double> %in, double 3.14, i32 3 + br label %finally +else: + %y = insertelement <5 x double> %in, double 9.14, i32 2 + br i1 %cond, label %finally, label %finally +finally: + %val = phi <5 x double> [%x, %then], [%y, %else], [%y, %else] + br label %reallyfinally +reallyfinally: + store <5 x double> %val, ptr %out, align 1 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -9,6 +9,12 @@ declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0 declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0 +declare <6 x half> @llvm.canonicalize.v6f16(<6 x half>) #0 +declare <8 x half> @llvm.canonicalize.v8f16(<8 x half>) #0 +declare <12 x half> @llvm.canonicalize.v12f16(<12 x half>) #0 +declare <16 x half> @llvm.canonicalize.v16f16(<16 x half>) #0 +declare <32 x half> @llvm.canonicalize.v32f16(<32 x half>) #0 +declare <64 x half> @llvm.canonicalize.v64f16(<64 x half>) #0 declare i32 @llvm.amdgcn.workitem.id.x() #0 define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(ptr addrspace(1) %out) #1 { @@ -2227,6 +2233,807 @@ ret <4 x half> %canonicalized } +define <6 x half> @v_test_canonicalize_var_v6f16(<6 x half> %val) #1 { +; VI-LABEL: v_test_canonicalize_var_v6f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_max_f16_sdwa v3, v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v4, v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v5, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v2, v2, v2 +; VI-NEXT: v_max_f16_e32 v1, v1, v1 +; VI-NEXT: v_max_f16_e32 v0, v0, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v5 +; VI-NEXT: v_or_b32_e32 v1, v1, v4 +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_canonicalize_var_v6f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: v_test_canonicalize_var_v6f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; CI-NEXT: s_setpc_b64 s[30:31] + %canonicalized = call <6 x half> @llvm.canonicalize.v6f16(<6 x half> %val) + ret <6 x half> %canonicalized +} + +define <8 x half> @v_test_canonicalize_var_v8f16(<8 x half> %val) #1 { +; VI-LABEL: v_test_canonicalize_var_v8f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_max_f16_sdwa v4, v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v5, v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v6, v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v7, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v3, v3, v3 +; VI-NEXT: v_max_f16_e32 v2, v2, v2 +; VI-NEXT: v_max_f16_e32 v1, v1, v1 +; VI-NEXT: v_max_f16_e32 v0, v0, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v7 +; VI-NEXT: v_or_b32_e32 v1, v1, v6 +; VI-NEXT: v_or_b32_e32 v2, v2, v5 +; VI-NEXT: v_or_b32_e32 v3, v3, v4 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_canonicalize_var_v8f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: v_test_canonicalize_var_v8f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; CI-NEXT: s_setpc_b64 s[30:31] + %canonicalized = call <8 x half> @llvm.canonicalize.v8f16(<8 x half> %val) + ret <8 x half> %canonicalized +} + +define <12 x half> @v_test_canonicalize_var_v12f16(<12 x half> %val) #1 { +; VI-LABEL: v_test_canonicalize_var_v12f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_max_f16_sdwa v6, v5, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v7, v4, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v8, v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v9, v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v10, v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v11, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v5, v5, v5 +; VI-NEXT: v_max_f16_e32 v4, v4, v4 +; VI-NEXT: v_max_f16_e32 v3, v3, v3 +; VI-NEXT: v_max_f16_e32 v2, v2, v2 +; VI-NEXT: v_max_f16_e32 v1, v1, v1 +; VI-NEXT: v_max_f16_e32 v0, v0, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v11 +; VI-NEXT: v_or_b32_e32 v1, v1, v10 +; VI-NEXT: v_or_b32_e32 v2, v2, v9 +; VI-NEXT: v_or_b32_e32 v3, v3, v8 +; VI-NEXT: v_or_b32_e32 v4, v4, v7 +; VI-NEXT: v_or_b32_e32 v5, v5, v6 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_canonicalize_var_v12f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: v_test_canonicalize_var_v12f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; CI-NEXT: s_setpc_b64 s[30:31] + %canonicalized = call <12 x half> @llvm.canonicalize.v12f16(<12 x half> %val) + ret <12 x half> %canonicalized +} + +define <16 x half> @v_test_canonicalize_var_v16f16(<16 x half> %val) #1 { +; VI-LABEL: v_test_canonicalize_var_v16f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_max_f16_sdwa v8, v7, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v9, v6, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v10, v5, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v11, v4, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v12, v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v13, v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v14, v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v15, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v7, v7, v7 +; VI-NEXT: v_max_f16_e32 v6, v6, v6 +; VI-NEXT: v_max_f16_e32 v5, v5, v5 +; VI-NEXT: v_max_f16_e32 v4, v4, v4 +; VI-NEXT: v_max_f16_e32 v3, v3, v3 +; VI-NEXT: v_max_f16_e32 v2, v2, v2 +; VI-NEXT: v_max_f16_e32 v1, v1, v1 +; VI-NEXT: v_max_f16_e32 v0, v0, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v15 +; VI-NEXT: v_or_b32_e32 v1, v1, v14 +; VI-NEXT: v_or_b32_e32 v2, v2, v13 +; VI-NEXT: v_or_b32_e32 v3, v3, v12 +; VI-NEXT: v_or_b32_e32 v4, v4, v11 +; VI-NEXT: v_or_b32_e32 v5, v5, v10 +; VI-NEXT: v_or_b32_e32 v6, v6, v9 +; VI-NEXT: v_or_b32_e32 v7, v7, v8 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_canonicalize_var_v16f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX9-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: v_test_canonicalize_var_v16f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; CI-NEXT: v_cvt_f16_f32_e32 v12, v12 +; CI-NEXT: v_cvt_f16_f32_e32 v13, v13 +; CI-NEXT: v_cvt_f16_f32_e32 v14, v14 +; CI-NEXT: v_cvt_f16_f32_e32 v15, v15 +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; CI-NEXT: v_cvt_f32_f16_e32 v12, v12 +; CI-NEXT: v_cvt_f32_f16_e32 v13, v13 +; CI-NEXT: v_cvt_f32_f16_e32 v14, v14 +; CI-NEXT: v_cvt_f32_f16_e32 v15, v15 +; CI-NEXT: s_setpc_b64 s[30:31] + %canonicalized = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> %val) + ret <16 x half> %canonicalized +} + +define <32 x half> @v_test_canonicalize_var_v32f16(<32 x half> %val) #1 { +; VI-LABEL: v_test_canonicalize_var_v32f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_max_f16_sdwa v20, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v0, v0, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v20 +; VI-NEXT: v_max_f16_sdwa v20, v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v1, v1, v1 +; VI-NEXT: v_or_b32_e32 v1, v1, v20 +; VI-NEXT: v_max_f16_sdwa v20, v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v2, v2, v2 +; VI-NEXT: v_or_b32_e32 v2, v2, v20 +; VI-NEXT: v_max_f16_sdwa v20, v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v3, v3, v3 +; VI-NEXT: v_or_b32_e32 v3, v3, v20 +; VI-NEXT: v_max_f16_sdwa v20, v4, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v4, v4, v4 +; VI-NEXT: v_or_b32_e32 v4, v4, v20 +; VI-NEXT: v_max_f16_sdwa v20, v5, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v5, v5, v5 +; VI-NEXT: v_or_b32_e32 v5, v5, v20 +; VI-NEXT: v_max_f16_sdwa v20, v6, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v6, v6, v6 +; VI-NEXT: v_or_b32_e32 v6, v6, v20 +; VI-NEXT: v_max_f16_sdwa v20, v7, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v7, v7, v7 +; VI-NEXT: v_or_b32_e32 v7, v7, v20 +; VI-NEXT: v_max_f16_sdwa v20, v8, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v8, v8, v8 +; VI-NEXT: v_or_b32_e32 v8, v8, v20 +; VI-NEXT: v_max_f16_sdwa v20, v9, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v9, v9, v9 +; VI-NEXT: v_or_b32_e32 v9, v9, v20 +; VI-NEXT: v_max_f16_sdwa v20, v10, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v10, v10, v10 +; VI-NEXT: v_max_f16_sdwa v16, v15, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v17, v14, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v18, v13, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_sdwa v19, v12, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_or_b32_e32 v10, v10, v20 +; VI-NEXT: v_max_f16_sdwa v20, v11, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v15, v15, v15 +; VI-NEXT: v_max_f16_e32 v14, v14, v14 +; VI-NEXT: v_max_f16_e32 v13, v13, v13 +; VI-NEXT: v_max_f16_e32 v12, v12, v12 +; VI-NEXT: v_max_f16_e32 v11, v11, v11 +; VI-NEXT: v_or_b32_e32 v11, v11, v20 +; VI-NEXT: v_or_b32_e32 v12, v12, v19 +; VI-NEXT: v_or_b32_e32 v13, v13, v18 +; VI-NEXT: v_or_b32_e32 v14, v14, v17 +; VI-NEXT: v_or_b32_e32 v15, v15, v16 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_canonicalize_var_v32f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX9-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX9-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX9-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX9-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX9-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX9-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX9-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX9-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX9-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: v_test_canonicalize_var_v32f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; CI-NEXT: v_cvt_f16_f32_e32 v12, v12 +; CI-NEXT: v_cvt_f16_f32_e32 v13, v13 +; CI-NEXT: v_cvt_f16_f32_e32 v14, v14 +; CI-NEXT: v_cvt_f16_f32_e32 v15, v15 +; CI-NEXT: v_cvt_f16_f32_e32 v16, v16 +; CI-NEXT: v_cvt_f16_f32_e32 v17, v17 +; CI-NEXT: v_cvt_f16_f32_e32 v18, v18 +; CI-NEXT: v_cvt_f16_f32_e32 v19, v19 +; CI-NEXT: v_cvt_f16_f32_e32 v20, v20 +; CI-NEXT: v_cvt_f16_f32_e32 v21, v21 +; CI-NEXT: v_cvt_f16_f32_e32 v22, v22 +; CI-NEXT: v_cvt_f16_f32_e32 v23, v23 +; CI-NEXT: v_cvt_f16_f32_e32 v24, v24 +; CI-NEXT: v_cvt_f16_f32_e32 v25, v25 +; CI-NEXT: v_cvt_f16_f32_e32 v26, v26 +; CI-NEXT: v_cvt_f16_f32_e32 v27, v27 +; CI-NEXT: v_cvt_f16_f32_e32 v28, v28 +; CI-NEXT: v_cvt_f16_f32_e32 v29, v29 +; CI-NEXT: v_cvt_f16_f32_e32 v30, v30 +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; CI-NEXT: v_cvt_f32_f16_e32 v12, v12 +; CI-NEXT: v_cvt_f32_f16_e32 v13, v13 +; CI-NEXT: v_cvt_f32_f16_e32 v14, v14 +; CI-NEXT: v_cvt_f32_f16_e32 v15, v15 +; CI-NEXT: v_cvt_f32_f16_e32 v16, v16 +; CI-NEXT: v_cvt_f32_f16_e32 v17, v17 +; CI-NEXT: v_cvt_f32_f16_e32 v18, v18 +; CI-NEXT: v_cvt_f32_f16_e32 v19, v19 +; CI-NEXT: v_cvt_f32_f16_e32 v20, v20 +; CI-NEXT: v_cvt_f32_f16_e32 v21, v21 +; CI-NEXT: v_cvt_f32_f16_e32 v22, v22 +; CI-NEXT: v_cvt_f32_f16_e32 v23, v23 +; CI-NEXT: v_cvt_f32_f16_e32 v24, v24 +; CI-NEXT: v_cvt_f32_f16_e32 v25, v25 +; CI-NEXT: v_cvt_f32_f16_e32 v26, v26 +; CI-NEXT: v_cvt_f32_f16_e32 v27, v27 +; CI-NEXT: v_cvt_f32_f16_e32 v28, v28 +; CI-NEXT: v_cvt_f32_f16_e32 v29, v29 +; CI-NEXT: v_cvt_f32_f16_e32 v30, v30 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v31, v31 +; CI-NEXT: v_cvt_f32_f16_e32 v31, v31 +; CI-NEXT: s_setpc_b64 s[30:31] + %canonicalized = call <32 x half> @llvm.canonicalize.v32f16(<32 x half> %val) + ret <32 x half> %canonicalized +} + +define <64 x half> @v_test_canonicalize_var_v64f16(<64 x half> %val) #1 { +; VI-LABEL: v_test_canonicalize_var_v64f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_max_f16_sdwa v31, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v0, v0, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v31 +; VI-NEXT: v_max_f16_sdwa v31, v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v1, v1, v1 +; VI-NEXT: v_or_b32_e32 v1, v1, v31 +; VI-NEXT: v_max_f16_sdwa v31, v2, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v2, v2, v2 +; VI-NEXT: v_or_b32_e32 v2, v2, v31 +; VI-NEXT: v_max_f16_sdwa v31, v3, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v3, v3, v3 +; VI-NEXT: v_or_b32_e32 v3, v3, v31 +; VI-NEXT: v_max_f16_sdwa v31, v4, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v4, v4, v4 +; VI-NEXT: v_or_b32_e32 v4, v4, v31 +; VI-NEXT: v_max_f16_sdwa v31, v5, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v5, v5, v5 +; VI-NEXT: v_or_b32_e32 v5, v5, v31 +; VI-NEXT: v_max_f16_sdwa v31, v6, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v6, v6, v6 +; VI-NEXT: v_or_b32_e32 v6, v6, v31 +; VI-NEXT: v_max_f16_sdwa v31, v7, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v7, v7, v7 +; VI-NEXT: v_or_b32_e32 v7, v7, v31 +; VI-NEXT: v_max_f16_sdwa v31, v8, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v8, v8, v8 +; VI-NEXT: v_or_b32_e32 v8, v8, v31 +; VI-NEXT: v_max_f16_sdwa v31, v9, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v9, v9, v9 +; VI-NEXT: v_or_b32_e32 v9, v9, v31 +; VI-NEXT: v_max_f16_sdwa v31, v10, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v10, v10, v10 +; VI-NEXT: v_or_b32_e32 v10, v10, v31 +; VI-NEXT: v_max_f16_sdwa v31, v11, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v11, v11, v11 +; VI-NEXT: v_or_b32_e32 v11, v11, v31 +; VI-NEXT: v_max_f16_sdwa v31, v12, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v12, v12, v12 +; VI-NEXT: v_or_b32_e32 v12, v12, v31 +; VI-NEXT: v_max_f16_sdwa v31, v13, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v13, v13, v13 +; VI-NEXT: v_or_b32_e32 v13, v13, v31 +; VI-NEXT: v_max_f16_sdwa v31, v14, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v14, v14, v14 +; VI-NEXT: v_or_b32_e32 v14, v14, v31 +; VI-NEXT: v_max_f16_sdwa v31, v15, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v15, v15, v15 +; VI-NEXT: v_or_b32_e32 v15, v15, v31 +; VI-NEXT: v_max_f16_sdwa v31, v16, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v16, v16, v16 +; VI-NEXT: v_or_b32_e32 v16, v16, v31 +; VI-NEXT: v_max_f16_sdwa v31, v17, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v17, v17, v17 +; VI-NEXT: v_or_b32_e32 v17, v17, v31 +; VI-NEXT: v_max_f16_sdwa v31, v18, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v18, v18, v18 +; VI-NEXT: v_or_b32_e32 v18, v18, v31 +; VI-NEXT: v_max_f16_sdwa v31, v19, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v19, v19, v19 +; VI-NEXT: v_or_b32_e32 v19, v19, v31 +; VI-NEXT: v_max_f16_sdwa v31, v20, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v20, v20, v20 +; VI-NEXT: v_or_b32_e32 v20, v20, v31 +; VI-NEXT: v_max_f16_sdwa v31, v21, v21 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v21, v21, v21 +; VI-NEXT: v_or_b32_e32 v21, v21, v31 +; VI-NEXT: v_max_f16_sdwa v31, v22, v22 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v22, v22, v22 +; VI-NEXT: v_or_b32_e32 v22, v22, v31 +; VI-NEXT: v_max_f16_sdwa v31, v23, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v23, v23, v23 +; VI-NEXT: v_or_b32_e32 v23, v23, v31 +; VI-NEXT: v_max_f16_sdwa v31, v24, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v24, v24, v24 +; VI-NEXT: v_or_b32_e32 v24, v24, v31 +; VI-NEXT: v_max_f16_sdwa v31, v25, v25 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v25, v25, v25 +; VI-NEXT: v_or_b32_e32 v25, v25, v31 +; VI-NEXT: v_max_f16_sdwa v31, v26, v26 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v26, v26, v26 +; VI-NEXT: v_or_b32_e32 v26, v26, v31 +; VI-NEXT: v_max_f16_sdwa v31, v27, v27 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v27, v27, v27 +; VI-NEXT: v_or_b32_e32 v27, v27, v31 +; VI-NEXT: v_max_f16_sdwa v31, v28, v28 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v28, v28, v28 +; VI-NEXT: v_or_b32_e32 v28, v28, v31 +; VI-NEXT: v_max_f16_sdwa v31, v29, v29 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v29, v29, v29 +; VI-NEXT: v_or_b32_e32 v29, v29, v31 +; VI-NEXT: v_max_f16_sdwa v31, v30, v30 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v30, v30, v30 +; VI-NEXT: v_or_b32_e32 v30, v30, v31 +; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_max_f16_sdwa v32, v31, v31 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_max_f16_e32 v31, v31, v31 +; VI-NEXT: v_or_b32_e32 v31, v31, v32 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_canonicalize_var_v64f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX9-NEXT: v_pk_max_f16 v4, v4, v4 +; GFX9-NEXT: v_pk_max_f16 v5, v5, v5 +; GFX9-NEXT: v_pk_max_f16 v6, v6, v6 +; GFX9-NEXT: v_pk_max_f16 v7, v7, v7 +; GFX9-NEXT: v_pk_max_f16 v8, v8, v8 +; GFX9-NEXT: v_pk_max_f16 v9, v9, v9 +; GFX9-NEXT: v_pk_max_f16 v10, v10, v10 +; GFX9-NEXT: v_pk_max_f16 v11, v11, v11 +; GFX9-NEXT: v_pk_max_f16 v12, v12, v12 +; GFX9-NEXT: v_pk_max_f16 v13, v13, v13 +; GFX9-NEXT: v_pk_max_f16 v14, v14, v14 +; GFX9-NEXT: v_pk_max_f16 v15, v15, v15 +; GFX9-NEXT: v_pk_max_f16 v16, v16, v16 +; GFX9-NEXT: v_pk_max_f16 v17, v17, v17 +; GFX9-NEXT: v_pk_max_f16 v18, v18, v18 +; GFX9-NEXT: v_pk_max_f16 v19, v19, v19 +; GFX9-NEXT: v_pk_max_f16 v20, v20, v20 +; GFX9-NEXT: v_pk_max_f16 v21, v21, v21 +; GFX9-NEXT: v_pk_max_f16 v22, v22, v22 +; GFX9-NEXT: v_pk_max_f16 v23, v23, v23 +; GFX9-NEXT: v_pk_max_f16 v24, v24, v24 +; GFX9-NEXT: v_pk_max_f16 v25, v25, v25 +; GFX9-NEXT: v_pk_max_f16 v26, v26, v26 +; GFX9-NEXT: v_pk_max_f16 v27, v27, v27 +; GFX9-NEXT: v_pk_max_f16 v28, v28, v28 +; GFX9-NEXT: v_pk_max_f16 v29, v29, v29 +; GFX9-NEXT: v_pk_max_f16 v30, v30, v30 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v31, v31, v31 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: v_test_canonicalize_var_v64f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_or_b32_e32 v1, v1, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v4 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v5 +; CI-NEXT: v_cvt_f16_f32_e32 v5, v7 +; CI-NEXT: v_cvt_f16_f32_e32 v7, v9 +; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_or_b32_e32 v2, v3, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v6 +; CI-NEXT: v_cvt_f16_f32_e32 v6, v10 +; CI-NEXT: v_cvt_f16_f32_e32 v9, v13 +; CI-NEXT: v_cvt_f16_f32_e32 v10, v18 +; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; CI-NEXT: v_or_b32_e32 v3, v4, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v8 +; CI-NEXT: v_cvt_f16_f32_e32 v8, v14 +; CI-NEXT: v_cvt_f16_f32_e32 v13, v21 +; CI-NEXT: v_cvt_f16_f32_e32 v14, v26 +; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; CI-NEXT: v_or_b32_e32 v4, v5, v4 +; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v6 +; CI-NEXT: v_cvt_f16_f32_e32 v6, v12 +; CI-NEXT: v_or_b32_e32 v5, v7, v5 +; CI-NEXT: v_cvt_f16_f32_e32 v7, v11 +; CI-NEXT: v_cvt_f16_f32_e32 v11, v17 +; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; CI-NEXT: v_cvt_f16_f32_e32 v12, v22 +; CI-NEXT: v_or_b32_e32 v6, v7, v6 +; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v8 +; CI-NEXT: v_cvt_f16_f32_e32 v8, v16 +; CI-NEXT: v_or_b32_e32 v7, v9, v7 +; CI-NEXT: v_cvt_f16_f32_e32 v9, v15 +; CI-NEXT: v_cvt_f16_f32_e32 v15, v25 +; CI-NEXT: v_lshlrev_b32_e32 v8, 16, v8 +; CI-NEXT: v_cvt_f16_f32_e32 v25, v29 +; CI-NEXT: v_or_b32_e32 v8, v9, v8 +; CI-NEXT: v_lshlrev_b32_e32 v9, 16, v10 +; CI-NEXT: v_cvt_f16_f32_e32 v10, v20 +; CI-NEXT: v_or_b32_e32 v9, v11, v9 +; CI-NEXT: v_cvt_f16_f32_e32 v11, v19 +; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 +; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 +; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 +; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:8 +; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; CI-NEXT: v_or_b32_e32 v10, v11, v10 +; CI-NEXT: v_lshlrev_b32_e32 v11, 16, v12 +; CI-NEXT: v_cvt_f16_f32_e32 v12, v24 +; CI-NEXT: v_or_b32_e32 v11, v13, v11 +; CI-NEXT: v_cvt_f16_f32_e32 v13, v23 +; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:20 +; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:16 +; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:28 +; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:24 +; CI-NEXT: v_lshlrev_b32_e32 v12, 16, v12 +; CI-NEXT: v_cvt_f16_f32_e32 v24, v30 +; CI-NEXT: v_or_b32_e32 v12, v13, v12 +; CI-NEXT: v_lshlrev_b32_e32 v13, 16, v14 +; CI-NEXT: v_or_b32_e32 v13, v15, v13 +; CI-NEXT: v_cvt_f16_f32_e32 v14, v28 +; CI-NEXT: v_cvt_f16_f32_e32 v15, v27 +; CI-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:36 +; CI-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:32 +; CI-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:44 +; CI-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:40 +; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; CI-NEXT: v_or_b32_e32 v14, v15, v14 +; CI-NEXT: v_lshlrev_b32_e32 v15, 16, v24 +; CI-NEXT: v_or_b32_e32 v15, v25, v15 +; CI-NEXT: s_waitcnt vmcnt(11) +; CI-NEXT: v_cvt_f16_f32_e32 v16, v16 +; CI-NEXT: s_waitcnt vmcnt(10) +; CI-NEXT: v_cvt_f16_f32_e32 v17, v17 +; CI-NEXT: s_waitcnt vmcnt(9) +; CI-NEXT: v_cvt_f16_f32_e32 v18, v18 +; CI-NEXT: s_waitcnt vmcnt(8) +; CI-NEXT: v_cvt_f16_f32_e32 v19, v19 +; CI-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; CI-NEXT: v_or_b32_e32 v16, v17, v16 +; CI-NEXT: v_lshlrev_b32_e32 v17, 16, v18 +; CI-NEXT: v_or_b32_e32 v17, v19, v17 +; CI-NEXT: s_waitcnt vmcnt(7) +; CI-NEXT: v_cvt_f16_f32_e32 v18, v20 +; CI-NEXT: s_waitcnt vmcnt(6) +; CI-NEXT: v_cvt_f16_f32_e32 v19, v21 +; CI-NEXT: s_waitcnt vmcnt(5) +; CI-NEXT: v_cvt_f16_f32_e32 v20, v22 +; CI-NEXT: s_waitcnt vmcnt(4) +; CI-NEXT: v_cvt_f16_f32_e32 v21, v23 +; CI-NEXT: v_lshlrev_b32_e32 v18, 16, v18 +; CI-NEXT: v_or_b32_e32 v18, v19, v18 +; CI-NEXT: v_lshlrev_b32_e32 v19, 16, v20 +; CI-NEXT: v_or_b32_e32 v19, v21, v19 +; CI-NEXT: s_waitcnt vmcnt(3) +; CI-NEXT: v_cvt_f16_f32_e32 v20, v26 +; CI-NEXT: s_waitcnt vmcnt(2) +; CI-NEXT: v_cvt_f16_f32_e32 v21, v27 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f16_f32_e32 v26, v28 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v27, v29 +; CI-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; CI-NEXT: v_or_b32_e32 v20, v21, v20 +; CI-NEXT: v_lshlrev_b32_e32 v21, 16, v26 +; CI-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:52 +; CI-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:48 +; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:60 +; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 +; CI-NEXT: v_or_b32_e32 v21, v27, v21 +; CI-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:132 +; CI-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:128 +; CI-NEXT: s_waitcnt vmcnt(5) +; CI-NEXT: v_cvt_f16_f32_e32 v24, v24 +; CI-NEXT: s_waitcnt vmcnt(4) +; CI-NEXT: v_cvt_f16_f32_e32 v25, v25 +; CI-NEXT: s_waitcnt vmcnt(3) +; CI-NEXT: v_cvt_f16_f32_e32 v23, v23 +; CI-NEXT: s_waitcnt vmcnt(2) +; CI-NEXT: v_cvt_f16_f32_e32 v22, v22 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f16_f32_e32 v26, v26 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v27, v27 +; CI-NEXT: v_lshlrev_b32_e32 v24, 16, v24 +; CI-NEXT: v_or_b32_e32 v24, v25, v24 +; CI-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; CI-NEXT: v_or_b32_e32 v26, v27, v26 +; CI-NEXT: v_add_i32_e32 v27, vcc, 0x7c, v0 +; CI-NEXT: buffer_store_dword v26, v27, s[0:3], 0 offen +; CI-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:124 +; CI-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:120 +; CI-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; CI-NEXT: v_or_b32_e32 v22, v22, v23 +; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:88 +; CI-NEXT: s_waitcnt vmcnt(2) +; CI-NEXT: v_cvt_f16_f32_e32 v26, v26 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f16_f32_e32 v27, v27 +; CI-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; CI-NEXT: v_or_b32_e32 v26, v27, v26 +; CI-NEXT: v_add_i32_e32 v27, vcc, 0x78, v0 +; CI-NEXT: buffer_store_dword v26, v27, s[0:3], 0 offen +; CI-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:116 +; CI-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:112 +; CI-NEXT: s_waitcnt vmcnt(3) +; CI-NEXT: v_cvt_f16_f32_e32 v23, v23 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f16_f32_e32 v26, v26 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v27, v27 +; CI-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; CI-NEXT: v_or_b32_e32 v26, v27, v26 +; CI-NEXT: v_add_i32_e32 v27, vcc, 0x74, v0 +; CI-NEXT: buffer_store_dword v26, v27, s[0:3], 0 offen +; CI-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:108 +; CI-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:104 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f16_f32_e32 v25, v26 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v26, v27 +; CI-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:92 +; CI-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; CI-NEXT: v_or_b32_e32 v25, v26, v25 +; CI-NEXT: v_add_i32_e32 v26, vcc, 0x70, v0 +; CI-NEXT: buffer_store_dword v25, v26, s[0:3], 0 offen +; CI-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:100 +; CI-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:96 +; CI-NEXT: s_waitcnt vmcnt(3) +; CI-NEXT: v_cvt_f16_f32_e32 v27, v27 +; CI-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; CI-NEXT: v_or_b32_e32 v23, v23, v27 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f16_f32_e32 v25, v25 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v26, v26 +; CI-NEXT: v_add_i32_e32 v27, vcc, 0x68, v0 +; CI-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; CI-NEXT: v_or_b32_e32 v25, v26, v25 +; CI-NEXT: v_add_i32_e32 v26, vcc, 0x6c, v0 +; CI-NEXT: buffer_store_dword v25, v26, s[0:3], 0 offen +; CI-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:68 +; CI-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:64 +; CI-NEXT: buffer_store_dword v23, v27, s[0:3], 0 offen +; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:76 +; CI-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:72 +; CI-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:84 +; CI-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:80 +; CI-NEXT: s_waitcnt vmcnt(3) +; CI-NEXT: v_cvt_f16_f32_e32 v23, v23 +; CI-NEXT: v_cvt_f16_f32_e32 v25, v25 +; CI-NEXT: v_cvt_f16_f32_e32 v26, v26 +; CI-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; CI-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; CI-NEXT: v_or_b32_e32 v25, v26, v25 +; CI-NEXT: s_waitcnt vmcnt(2) +; CI-NEXT: v_cvt_f16_f32_e32 v26, v27 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v27, v29 +; CI-NEXT: v_or_b32_e32 v23, v26, v23 +; CI-NEXT: v_cvt_f16_f32_e32 v26, v28 +; CI-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; CI-NEXT: v_or_b32_e32 v26, v27, v26 +; CI-NEXT: v_add_i32_e32 v27, vcc, 0x64, v0 +; CI-NEXT: buffer_store_dword v26, v27, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v26, vcc, 0x60, v0 +; CI-NEXT: buffer_store_dword v23, v26, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v23, vcc, 0x5c, v0 +; CI-NEXT: buffer_store_dword v25, v23, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v23, vcc, 0x58, v0 +; CI-NEXT: buffer_store_dword v22, v23, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v22, vcc, 0x54, v0 +; CI-NEXT: buffer_store_dword v24, v22, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v22, vcc, 0x50, v0 +; CI-NEXT: buffer_store_dword v21, v22, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v21, vcc, 0x4c, v0 +; CI-NEXT: buffer_store_dword v20, v21, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v20, vcc, 0x48, v0 +; CI-NEXT: buffer_store_dword v19, v20, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v19, vcc, 0x44, v0 +; CI-NEXT: buffer_store_dword v18, v19, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v18, vcc, 64, v0 +; CI-NEXT: buffer_store_dword v17, v18, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v17, vcc, 60, v0 +; CI-NEXT: buffer_store_dword v16, v17, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v16, vcc, 56, v0 +; CI-NEXT: buffer_store_dword v15, v16, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v15, vcc, 52, v0 +; CI-NEXT: buffer_store_dword v14, v15, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v14, vcc, 48, v0 +; CI-NEXT: buffer_store_dword v13, v14, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v13, vcc, 44, v0 +; CI-NEXT: buffer_store_dword v12, v13, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v12, vcc, 40, v0 +; CI-NEXT: buffer_store_dword v11, v12, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v11, vcc, 36, v0 +; CI-NEXT: buffer_store_dword v10, v11, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v10, vcc, 32, v0 +; CI-NEXT: buffer_store_dword v9, v10, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v9, vcc, 28, v0 +; CI-NEXT: buffer_store_dword v8, v9, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v8, vcc, 24, v0 +; CI-NEXT: buffer_store_dword v7, v8, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v7, vcc, 20, v0 +; CI-NEXT: buffer_store_dword v6, v7, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v6, vcc, 16, v0 +; CI-NEXT: buffer_store_dword v5, v6, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v5, vcc, 12, v0 +; CI-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v4, vcc, 8, v0 +; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen +; CI-NEXT: v_add_i32_e32 v3, vcc, 4, v0 +; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; CI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: s_setpc_b64 s[30:31] + %canonicalized = call <64 x half> @llvm.canonicalize.v64f16(<64 x half> %val) + ret <64 x half> %canonicalized +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll @@ -21,6 +21,21 @@ ret void } +define amdgpu_kernel void @set_inactive_imm_poison(ptr addrspace(1) %out) { +; GCN-LABEL: set_inactive_imm_poison: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 1, i32 poison) #0 + store i32 %tmp, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) { ; GCN-LABEL: set_inactive_64: ; GCN: ; %bb.0: @@ -43,6 +58,22 @@ ret void } +define amdgpu_kernel void @set_inactive_imm_poison_64(ptr addrspace(1) %out) { +; GCN-LABEL: set_inactive_imm_poison_64: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 1, i64 poison) #0 + store i64 %tmp, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x i32> inreg %desc) { ; GCN-LABEL: set_inactive_scc: ; GCN: ; %bb.0: @@ -58,20 +89,20 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_lg_u32 s3, 56 ; GCN-NEXT: s_mov_b64 s[2:3], -1 -; GCN-NEXT: s_cbranch_scc1 .LBB2_3 +; GCN-NEXT: s_cbranch_scc1 .LBB4_3 ; GCN-NEXT: ; %bb.1: ; %Flow ; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3] -; GCN-NEXT: s_cbranch_vccz .LBB2_4 -; GCN-NEXT: .LBB2_2: ; %.exit +; GCN-NEXT: s_cbranch_vccz .LBB4_4 +; GCN-NEXT: .LBB4_2: ; %.exit ; GCN-NEXT: s_endpgm -; GCN-NEXT: .LBB2_3: ; %.one +; GCN-NEXT: .LBB4_3: ; %.one ; GCN-NEXT: v_add_u32_e32 v1, vcc, 1, v0 ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 ; GCN-NEXT: s_mov_b64 s[2:3], 0 -; GCN-NEXT: s_cbranch_execnz .LBB2_2 -; GCN-NEXT: .LBB2_4: ; %.zero +; GCN-NEXT: s_cbranch_execnz .LBB4_2 +; GCN-NEXT: .LBB4_4: ; %.zero ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 diff --git a/llvm/test/CodeGen/PowerPC/vec-zext-abdu.ll b/llvm/test/CodeGen/PowerPC/vec-zext-abdu.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec-zext-abdu.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -mcpu=pwr9 < %s | FileCheck %s + +define <12 x i8> @zext_abdu(<12 x i8> %a, <12 x i8> %b) { +; CHECK-LABEL: zext_abdu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: xxlxor 36, 36, 36 +; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: lxv 37, 0(3) +; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NEXT: lxv 33, 0(3) +; CHECK-NEXT: addis 3, 2, .LCPI0_2@toc@ha +; CHECK-NEXT: vperm 0, 4, 2, 5 +; CHECK-NEXT: vperm 5, 4, 3, 5 +; CHECK-NEXT: addi 3, 3, .LCPI0_2@toc@l +; CHECK-NEXT: lxv 39, 0(3) +; CHECK-NEXT: vperm 6, 4, 2, 1 +; CHECK-NEXT: vperm 1, 4, 3, 1 +; CHECK-NEXT: vperm 2, 4, 2, 7 +; CHECK-NEXT: vperm 3, 4, 3, 7 +; CHECK-NEXT: xvnegsp 36, 38 +; CHECK-NEXT: xvnegsp 35, 35 +; CHECK-NEXT: xvnegsp 34, 34 +; CHECK-NEXT: vabsduw 2, 2, 3 +; CHECK-NEXT: xvnegsp 35, 33 +; CHECK-NEXT: vabsduw 3, 4, 3 +; CHECK-NEXT: xvnegsp 36, 37 +; CHECK-NEXT: xvnegsp 37, 32 +; CHECK-NEXT: vpkuwum 2, 2, 2 +; CHECK-NEXT: vabsduw 4, 5, 4 +; CHECK-NEXT: vpkuwum 3, 4, 3 +; CHECK-NEXT: vpkuhum 2, 2, 3 +; CHECK-NEXT: blr +entry: + %aa = zext <12 x i8> %a to <12 x i32> + %bb = zext <12 x i8> %b to <12 x i32> + %s = sub nsw <12 x i32> %aa, %bb + %c = icmp slt <12 x i32> %s, zeroinitializer + %ss = sub nsw <12 x i32> zeroinitializer, %s + %sel = select <12 x i1> %c, <12 x i32> %ss, <12 x i32> %s + %ret = trunc <12 x i32> %sel to <12 x i8> + ret <12 x i8> %ret +} diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -75,8 +75,8 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-smaia %s -o - | FileCheck --check-prefixes=CHECK,RV32SMAIA %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-ssaia %s -o - | FileCheck --check-prefixes=CHECK,RV32SSAIA %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZFBFMIN %s -; RUN: llc -mtriple=riscv32 -mattr=+f,+experimental-zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFMIN %s -; RUN: llc -mtriple=riscv32 -mattr=+f,+experimental-zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFWMA %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFMIN %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFWMA %s ; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefixes=CHECK,RV64M %s @@ -159,8 +159,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+experimental-smaia %s -o - | FileCheck --check-prefixes=CHECK,RV64SMAIA %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-ssaia %s -o - | FileCheck --check-prefixes=CHECK,RV64SSAIA %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZFBFMIN %s -; RUN: llc -mtriple=riscv64 -mattr=+f,+experimental-zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFMIN %s -; RUN: llc -mtriple=riscv64 -mattr=+f,+experimental-zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFWMA %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFMIN %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFWMA %s ; CHECK: .attribute 4, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s define <5 x i8> @load_v5i8(ptr %p) { ; RV32-LABEL: load_v5i8: @@ -123,29 +123,11 @@ } define <6 x half> @load_v6f16(ptr %p) { -; RV32-LABEL: load_v6f16: -; RV32: # %bb.0: -; RV32-NEXT: lw a2, 8(a1) -; RV32-NEXT: lw a3, 4(a1) -; RV32-NEXT: lw a1, 0(a1) -; RV32-NEXT: sw a2, 8(a0) -; RV32-NEXT: sw a3, 4(a0) -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: load_v6f16: -; RV64: # %bb.0: -; RV64-NEXT: ld a2, 0(a1) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: ld a1, 8(a1) -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: sd a2, 0(a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: load_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret %x = load <6 x half>, ptr %p ret <6 x half> %x } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s define void @store_v5i8(ptr %p, <5 x i8> %v) { ; CHECK-LABEL: store_v5i8: @@ -103,56 +103,16 @@ define void @store_v6f16(ptr %p, <6 x half> %v) { ; RV32-LABEL: store_v6f16: ; RV32: # %bb.0: -; RV32-NEXT: lh a2, 20(a1) -; RV32-NEXT: lhu a3, 16(a1) -; RV32-NEXT: slli a2, a2, 16 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: lh a3, 12(a1) -; RV32-NEXT: lhu a4, 8(a1) -; RV32-NEXT: lh a5, 4(a1) -; RV32-NEXT: lhu a1, 0(a1) -; RV32-NEXT: slli a3, a3, 16 -; RV32-NEXT: or a3, a4, a3 -; RV32-NEXT: slli a5, a5, 16 -; RV32-NEXT: or a1, a1, a5 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: vslidedown.vi v9, v8, 2 ; RV32-NEXT: addi a1, a0, 8 ; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: addi a0, a0, 4 -; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: vse16.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: store_v6f16: ; RV64: # %bb.0: -; RV64-NEXT: lhu a2, 16(a1) -; RV64-NEXT: lh a3, 24(a1) -; RV64-NEXT: slli a2, a2, 32 -; RV64-NEXT: lh a4, 8(a1) -; RV64-NEXT: lhu a5, 0(a1) -; RV64-NEXT: slli a3, a3, 48 -; RV64-NEXT: or a2, a3, a2 -; RV64-NEXT: slli a4, a4, 16 -; RV64-NEXT: or a4, a5, a4 -; RV64-NEXT: slli a4, a4, 32 -; RV64-NEXT: lh a3, 40(a1) -; RV64-NEXT: lhu a1, 32(a1) -; RV64-NEXT: srli a4, a4, 32 -; RV64-NEXT: or a2, a4, a2 -; RV64-NEXT: slli a3, a3, 16 -; RV64-NEXT: or a1, a1, a3 -; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma diff --git a/llvm/test/CodeGen/X86/prefetchi.ll b/llvm/test/CodeGen/X86/prefetchi.ll --- a/llvm/test/CodeGen/X86/prefetchi.ll +++ b/llvm/test/CodeGen/X86/prefetchi.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mattr=+prefetchi | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=NOPREFETCHI define dso_local void @t(ptr %ptr) nounwind { ; CHECK-LABEL: t: @@ -9,6 +10,10 @@ ; CHECK-NEXT: prefetchit1 t(%rip) ; CHECK-NEXT: prefetchit0 ext(%rip) ; CHECK-NEXT: retq +; +; NOPREFETCHI-LABEL: t: +; NOPREFETCHI: # %bb.0: # %entry +; NOPREFETCHI-NEXT: retq entry: tail call void @llvm.prefetch(ptr %ptr, i32 0, i32 2, i32 0) tail call void @llvm.prefetch(ptr %ptr, i32 0, i32 3, i32 0) diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test --- a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test +++ b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test @@ -2,6 +2,7 @@ # Instructions for debugging can be found in LLJITWithRemoteDebugging.cpp # REQUIRES: default_triple +# UNSUPPORTED: target=powerpc64{{.*}} # RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1_elf.ll | FileCheck --check-prefix=CHECK0 %s # CHECK0: Parsing input IR code from: {{.*}}/Inputs/argc_sub1_elf.ll diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test --- a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test +++ b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test @@ -3,6 +3,7 @@ # RUN: opt -module-summary %p/Inputs/bar-mod.ll -o %T/bar-mod.bc # REQUIRES: default_triple +# UNSUPPORTED: target=powerpc64{{.*}} # RUN: llvm-lto -thinlto -o %T/main-foo-bar %T/main-mod.bc %T/foo-mod.bc %T/bar-mod.bc diff --git a/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-lazy.test b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-lazy.test --- a/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-lazy.test +++ b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-lazy.test @@ -1,5 +1,7 @@ # RUN: OrcV2CBindingsLazy 2>&1 | FileCheck -check-prefix=THIS %s # RUN: OrcV2CBindingsLazy 0 2>&1 | FileCheck -check-prefix=OTHER %s +# UNSUPPORTED: target=powerpc64{{.*}} + # THIS: entry(1) = 1 # OTHER: entry(2) = 2 diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_riscv64_got_plt_reloc.s b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_riscv64_got_plt_reloc.s --- a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_riscv64_got_plt_reloc.s +++ b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_riscv64_got_plt_reloc.s @@ -6,6 +6,13 @@ # RUN: -abs external_func=0x1 -abs external_data=0x2 \ # RUN: -check %s %t/elf_riscv64_got_plt_reloc.o +## Run the same tests with relaxation enabled. +# RUN: llvm-mc -triple=riscv64 -position-independent -filetype=obj \ +# RUN: -mattr=+relax -o %t/elf_riscv64_got_plt_reloc.o %s +# RUN: llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ +# RUN: -abs external_func=0x1 -abs external_data=0x2 \ +# RUN: -check %s %t/elf_riscv64_got_plt_reloc.o .text .file "testcase.c" diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO-duplicate-local.test b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO-duplicate-local.test --- a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO-duplicate-local.test +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO-duplicate-local.test @@ -12,6 +12,7 @@ # CHECK-NEXT: target = _foo --- !mach-o +IsLittleEndian: true FileHeader: magic: 0xFEEDFACF cputype: 0x1000007 diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_ehframe_bad_fde_cie-ptr_out-of-range.test b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_ehframe_bad_fde_cie-ptr_out-of-range.test --- a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_ehframe_bad_fde_cie-ptr_out-of-range.test +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_ehframe_bad_fde_cie-ptr_out-of-range.test @@ -6,6 +6,7 @@ # CHECK: llvm-jitlink error: No CIE found at address --- !mach-o +IsLittleEndian: true FileHeader: magic: 0xFEEDFACF cputype: 0x1000007 diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_ehframe_bad_fde_pc-begin_out-of-range.test b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_ehframe_bad_fde_pc-begin_out-of-range.test --- a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_ehframe_bad_fde_pc-begin_out-of-range.test +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_ehframe_bad_fde_pc-begin_out-of-range.test @@ -6,6 +6,7 @@ # CHECK: llvm-jitlink error: No symbol or block covering address --- !mach-o +IsLittleEndian: true FileHeader: magic: 0xFEEDFACF cputype: 0x1000007 diff --git a/llvm/test/ExecutionEngine/lit.local.cfg b/llvm/test/ExecutionEngine/lit.local.cfg --- a/llvm/test/ExecutionEngine/lit.local.cfg +++ b/llvm/test/ExecutionEngine/lit.local.cfg @@ -1,4 +1,4 @@ -if config.root.native_target in ["Sparc", "PowerPC", "SystemZ", "Hexagon", "RISCV"]: +if config.root.native_target in ['Sparc', 'SystemZ', 'Hexagon', 'RISCV']: config.unsupported = True # ExecutionEngine tests are not expected to pass in a cross-compilation setup. diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -267,8 +267,8 @@ .attribute arch, "rv32if_zfbfmin0p6" # CHECK: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin0p6" -.attribute arch, "rv32if_zvfbfmin0p6" +.attribute arch, "rv32i_zvfbfmin0p6" # CHECK: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin0p6_zvl32b1p0" -.attribute arch, "rv32if_zvfbfwma0p6" +.attribute arch, "rv32i_zvfbfwma0p6" # CHECK: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfwma0p6_zvl32b1p0" diff --git a/llvm/test/MC/RISCV/rv32zfbfmin-valid.s b/llvm/test/MC/RISCV/rv32zfbfmin-valid.s --- a/llvm/test/MC/RISCV/rv32zfbfmin-valid.s +++ b/llvm/test/MC/RISCV/rv32zfbfmin-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zfbfmin,+f -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zfbfmin -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zfbfmin,+f -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zfbfmin -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zfbfmin,+d < %s \ -# RUN: | llvm-objdump --mattr=+experimental-zfbfmin,+f -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zfbfmin,+f < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zfbfmin -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zfbfmin,+d < %s \ -# RUN: | llvm-objdump --mattr=+experimental-zfbfmin,+f -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zfbfmin,+f < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zfbfmin -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # CHECK-ASM-AND-OBJ: flh ft0, 12(a0) diff --git a/llvm/test/MC/RISCV/rvv/zvfbfmin.s b/llvm/test/MC/RISCV/rvv/zvfbfmin.s --- a/llvm/test/MC/RISCV/rvv/zvfbfmin.s +++ b/llvm/test/MC/RISCV/rvv/zvfbfmin.s @@ -1,20 +1,20 @@ -# RUN: llvm-mc -triple=riscv32 -show-encoding -mattr=+f,+experimental-zvfbfmin %s \ +# RUN: llvm-mc -triple=riscv32 -show-encoding -mattr=+experimental-zvfbfmin %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv32 -show-encoding -mattr=+v,+f %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv32 -filetype=obj -mattr=+f,+experimental-zvfbfmin %s \ -# RUN: | llvm-objdump -d --mattr=+f,+experimental-zvfbfmin - \ +# RUN: llvm-mc -triple=riscv32 -filetype=obj -mattr=+experimental-zvfbfmin %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zvfbfmin - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv32 -filetype=obj -mattr=+f,+experimental-zvfbfmin %s \ +# RUN: llvm-mc -triple=riscv32 -filetype=obj -mattr=+experimental-zvfbfmin %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN -# RUN: llvm-mc -triple=riscv64 -show-encoding -mattr=+f,+experimental-zvfbfmin %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding -mattr=+experimental-zvfbfmin %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding -mattr=+v,+f %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj -mattr=+f,+experimental-zvfbfmin %s \ -# RUN: | llvm-objdump -d --mattr=+f,+experimental-zvfbfmin - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj -mattr=+experimental-zvfbfmin %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zvfbfmin - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj -mattr=+f,+experimental-zvfbfmin %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj -mattr=+experimental-zvfbfmin %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN # CHECK-INST: vfncvtbf16.f.f.w v8, v4, v0.t diff --git a/llvm/test/MC/RISCV/rvv/zvfbfwma.s b/llvm/test/MC/RISCV/rvv/zvfbfwma.s --- a/llvm/test/MC/RISCV/rvv/zvfbfwma.s +++ b/llvm/test/MC/RISCV/rvv/zvfbfwma.s @@ -1,20 +1,20 @@ -# RUN: llvm-mc -triple=riscv32 -show-encoding -mattr=+f,+experimental-zvfbfwma %s \ +# RUN: llvm-mc -triple=riscv32 -show-encoding -mattr=+experimental-zvfbfwma %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv32 -show-encoding -mattr=+v,+f %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv32 -filetype=obj -mattr=+f,+experimental-zvfbfwma %s \ -# RUN: | llvm-objdump -d --mattr=+f,+experimental-zvfbfwma - \ +# RUN: llvm-mc -triple=riscv32 -filetype=obj -mattr=+experimental-zvfbfwma %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zvfbfwma - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv32 -filetype=obj -mattr=+f,+experimental-zvfbfwma %s \ +# RUN: llvm-mc -triple=riscv32 -filetype=obj -mattr=+experimental-zvfbfwma %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN -# RUN: llvm-mc -triple=riscv64 -show-encoding -mattr=+f,+experimental-zvfbfwma %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding -mattr=+experimental-zvfbfwma %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding -mattr=+v,+f %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj -mattr=+f,+experimental-zvfbfwma %s \ -# RUN: | llvm-objdump -d --mattr=+f,+experimental-zvfbfwma - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj -mattr=+experimental-zvfbfwma %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zvfbfwma - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj -mattr=+f,+experimental-zvfbfwma %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj -mattr=+experimental-zvfbfwma %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN # CHECK-INST: vfwmaccbf16.vv v8, v20, v4, v0.t diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -9,83 +9,83 @@ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O1,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O1,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O2,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-NO-FUNC-SPEC,CHECK-Os,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-Os,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-NO-FUNC-SPEC,CHECK-Oz,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-Oz,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-NO-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-O2,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-peephole='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PEEPHOLE,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PEEPHOLE,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-late-loop-optimizations='no-op-loop' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-LATE,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-LATE,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-loop-optimizer-end='no-op-loop' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-END,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-LOOP-END,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-scalar-optimizer-late='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-SCALAR-LATE,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-SCALAR-LATE,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-cgscc-optimizer-late='no-op-cgscc' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-CGSCC-LATE,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-CGSCC-LATE,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-vectorizer-start='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-VECTORIZER-START,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-VECTORIZER-START,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-pipeline-early-simplification='no-op-module' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-NO-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-LTO,CHECK-O3,%llvmcheckext,CHECK-EP-PIPELINE-START,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-optimizer-early='no-op-module' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-EARLY,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-EARLY,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes-ep-optimizer-last='no-op-module' \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-LAST,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,%llvmcheckext,CHECK-EP-OPTIMIZER-LAST,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -enable-matrix -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MATRIX +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MATRIX ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -enable-merge-functions -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MERGE-FUNCS +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-MERGE-FUNCS ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -ir-outliner -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-IR-OUTLINER +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-IR-OUTLINER ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='default' -hot-cold-split -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-HOT-COLD-SPLIT +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-DEFAULT,CHECK-O3,CHECK-O23SZ,%llvmcheckext,CHECK-HOT-COLD-SPLIT ; Suppress FileCheck --allow-unused-prefixes=false diagnostics. ; CHECK-Oz: {{^}} @@ -109,7 +109,6 @@ ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION-NEXT: Running pass: NoOpModulePass ; CHECK-O-NEXT: Running pass: IPSCCPPass -; CHECK-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: PromotePass @@ -164,7 +163,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-NO-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running analysis: LoopAnalysis ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -9,23 +9,23 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-EP ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='lto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='lto' -S %s -passes-ep-full-link-time-optimization-early=no-op-module \ ; RUN: -passes-ep-full-link-time-optimization-last=no-op-module 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23,CHECK-O23SZ,CHECK-EP +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-EP ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='lto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='lto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OS,CHECK-OSZ,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OS,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='lto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='lto' -S %s -passes-ep-peephole='no-op-function' 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23,CHECK-O23SZ,CHECK-EP-Peephole +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-EP-Peephole ; CHECK-EP: Running pass: NoOpModulePass ; CHECK-O: Running pass: CrossDSOCFIPass @@ -43,7 +43,6 @@ ; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass ; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo -; CHECK-O23-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis @@ -94,7 +93,7 @@ ; CHECK-O23SZ-NEXT: Invalidating analysis: AAManager on foo ; CHECK-O23SZ-NEXT: Running pass: OpenMPOptCGSCCPass on (foo) ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo -; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo ; CHECK-O23SZ-NEXT: Running analysis: MemorySSAAnalysis on foo ; CHECK-O23SZ-NEXT: Running analysis: AAManager on foo diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -10,28 +10,28 @@ ; Postlink pipelines: ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O1,CHECK-POSTLINK-O,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2 ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3 ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-optimizer-early='no-op-module' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-EARLY +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-EARLY ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-optimizer-last='no-op-module' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-LAST +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O3,CHECK-POST-EP-OPT-LAST ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-Os +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-Os ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2 ; Suppress FileCheck --allow-unused-prefixes=false diagnostics. ; CHECK-NOEXT: {{^}} @@ -58,7 +58,6 @@ ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass -; CHECK-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: PromotePass @@ -108,7 +107,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-NO-FUNC-SPEC-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running analysis: LoopAnalysis ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -3,22 +3,22 @@ ; Postlink pipelines: ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O1,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O3,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-Os,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext ; Suppress FileCheck --allow-unused-prefixes=false diagnostics. ; CHECK-NOEXT: {{^}} @@ -43,7 +43,6 @@ ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass -; CHECK-O123-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: PromotePass @@ -55,7 +54,7 @@ ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo -; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -3,27 +3,27 @@ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O1,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \ ; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O3,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-Os,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-OSZ,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,%llvmcheckext ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \ ; RUN: -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-thinlto-samplepgo-defaults.prof' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123,CHECK-O2,CHECK-O23SZ,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,%llvmcheckext ; Suppress FileCheck --allow-unused-prefixes=false diagnostics. ; CHECK-NOEXT: {{^}} @@ -51,7 +51,6 @@ ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass -; CHECK-O123-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: PromotePass @@ -63,7 +62,7 @@ ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo -; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -739,6 +739,127 @@ ret float %fabs } +define float @returned_fabs_nopos(float nofpclass(psub pnorm pinf) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @returned_fabs_nopos +; CHECK-SAME: (float nofpclass(pinf psub pnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(pinf psub pnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_nopos_nopzero(float nofpclass(psub pnorm pinf pzero) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @returned_fabs_nopos_nopzero +; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(pinf pzero psub pnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_nopos_nozero(float nofpclass(psub pnorm pinf zero) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf zero nsub nnorm) float @returned_fabs_nopos_nozero +; CHECK-SAME: (float nofpclass(pinf zero psub pnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf zero nsub nnorm) float @llvm.fabs.f32(float nofpclass(pinf zero psub pnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_nopos_nonan(float nofpclass(psub pnorm pinf nan) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @returned_fabs_nopos_nonan +; CHECK-SAME: (float nofpclass(nan pinf psub pnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(nan ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(nan pinf psub pnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_noneg(float nofpclass(nsub nnorm ninf) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @returned_fabs_noneg +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(ninf nsub nnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_noneg_nonzero(float nofpclass(nsub nnorm ninf nzero) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @returned_fabs_noneg_nonzero +; CHECK-SAME: (float nofpclass(ninf nzero nsub nnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(ninf nzero nsub nnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_noneg_nozero(float nofpclass(nsub nnorm ninf zero) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf zero nsub nnorm) float @returned_fabs_noneg_nozero +; CHECK-SAME: (float nofpclass(ninf zero nsub nnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf zero nsub nnorm) float @llvm.fabs.f32(float nofpclass(ninf zero nsub nnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_noneg_nonan(float nofpclass(nsub nnorm ninf nan) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan ninf nzero nsub nnorm) float @returned_fabs_noneg_nonan +; CHECK-SAME: (float nofpclass(nan ninf nsub nnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(nan ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(nan ninf nsub nnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_nonsub_nopnorm_nonzero(float nofpclass(nsub pnorm nzero) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @returned_fabs_nonsub_nopnorm_nonzero +; CHECK-SAME: (float nofpclass(nzero nsub pnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(nzero nsub pnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_nopsub_nonnorm_nopzero(float nofpclass(psub nnorm pzero) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @returned_fabs_nopsub_nonnorm_nopzero +; CHECK-SAME: (float nofpclass(pzero psub nnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(pzero psub nnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + +define float @returned_fabs_nonnorm_nozero(float nofpclass(nnorm nzero) %x) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @returned_fabs_nonnorm_nozero +; CHECK-SAME: (float nofpclass(nzero nnorm) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(nzero nnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: ret float [[FABS]] +; + %fabs = call float @llvm.fabs.f32(float %x) + ret float %fabs +} + define float @returned_fneg(float %x) { ; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define float @returned_fneg @@ -938,9 +1059,9 @@ define float @returned_fneg_fabs_nopos(float nofpclass(pinf psub pnorm pzero) %x) { ; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define nofpclass(inf zero sub norm) float @returned_fneg_fabs_nopos +; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @returned_fneg_fabs_nopos ; CHECK-SAME: (float nofpclass(pinf pzero psub pnorm) [[X:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(inf zero sub norm) float @llvm.fabs.f32(float nofpclass(pinf pzero psub pnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(pinf pzero psub pnorm) [[X]]) #[[ATTR13]] ; CHECK-NEXT: [[FNEG_FABS:%.*]] = fneg float [[FABS]] ; CHECK-NEXT: ret float [[FNEG_FABS]] ; @@ -951,9 +1072,9 @@ define float @returned_fneg_fabs_mixed(float nofpclass(psub nnorm nzero qnan ninf) %x) { ; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define nofpclass(qnan pinf pzero sub pnorm) float @returned_fneg_fabs_mixed +; CHECK-LABEL: define nofpclass(qnan pinf pzero psub pnorm) float @returned_fneg_fabs_mixed ; CHECK-SAME: (float nofpclass(qnan ninf nzero psub nnorm) [[X:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(qnan ninf nzero sub nnorm) float @llvm.fabs.f32(float nofpclass(qnan ninf nzero psub nnorm) [[X]]) #[[ATTR13]] +; CHECK-NEXT: [[FABS:%.*]] = call nofpclass(qnan ninf nzero nsub nnorm) float @llvm.fabs.f32(float nofpclass(qnan ninf nzero psub nnorm) [[X]]) #[[ATTR13]] ; CHECK-NEXT: [[FNEG_FABS:%.*]] = fneg float [[FABS]] ; CHECK-NEXT: ret float [[FNEG_FABS]] ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="ipsccp" -funcspec-avg-loop-iters=3 -funcspec-min-function-size=10 -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s ; CHECK-NOT: foo.{{[0-9]+}} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=true -funcspec-min-function-size=10 -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=true -force-specialization -S < %s | FileCheck %s ; Check that the literal constant parameter could be specialized. ; CHECK: @foo.1( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll deleted file mode 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: opt -passes="ipsccp" -funcspec-avg-loop-iters=5 -funcspec-min-function-size=10 -S < %s | FileCheck %s - -; Check that the loop depth results in a larger specialization bonus. -; CHECK: @foo.1( -; CHECK: @foo.2( - -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - -@A = external dso_local constant i32, align 4 -@B = external dso_local constant i32, align 4 -@C = external dso_local constant i32, align 4 -@D = external dso_local constant i32, align 4 - -declare i1 @cond_begin() -declare i1 @cond_end() -declare i1 @getCond() - -define internal i32 @foo(i32 %x, ptr %b, ptr %c) { -entry: - br label %loop.entry - -loop.entry: - br label %loop2.entry - -loop2.entry: - br label %loop2.body - -loop2.body: - %0 = load i32, ptr %b, align 4 - %1 = load i32, ptr %c, align 4 - %add.0 = add nsw i32 %0, %1 - %add = add nsw i32 %add.0, %x - br label %loop2.end - -loop2.end: - %cond.end = call i1 @cond_end() - br i1 %cond.end, label %loop2.entry, label %loop.end - -loop.end: - %cond2.end = call i1 @getCond() - br i1 %cond2.end, label %loop.entry, label %return - -return: - ret i32 %add -} - -define dso_local i32 @bar(i32 %x, i32 %y) { -entry: - %tobool = icmp ne i32 %x, 0 - br i1 %tobool, label %if.then, label %if.else - -if.then: - %call = call i32 @foo(i32 %x, ptr @A, ptr @C) - br label %return - -if.else: - %call1 = call i32 @foo(i32 %y, ptr @B, ptr @D) - br label %return - -return: - %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] - ret i32 %retval.0 -} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll @@ -2,7 +2,6 @@ ; RUN: opt -passes="ipsccp,deadargelim" -force-specialization -S < %s | FileCheck %s ; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s ; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED -; RUN: opt -passes="ipsccp,deadargelim" -funcspec-avg-loop-iters=1 -force-specialization -S < %s | FileCheck %s ; DISABLED-NOT: @func.1( ; DISABLED-NOT: @func.2( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll @@ -1,9 +1,7 @@ -; RUN: opt -passes="ipsccp" -funcspec-avg-loop-iters=3 -S < %s | \ +; RUN: opt -passes="ipsccp" -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,DISABLED ; RUN: opt -passes="ipsccp" -force-specialization -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,FORCE -; RUN: opt -passes="ipsccp" -funcspec-avg-loop-iters=3 -force-specialization -S < %s | \ -; RUN: FileCheck %s --check-prefixes=COMMON,FORCE ; Test for specializing a constant global. diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -71,3 +71,233 @@ bb12: ; preds = %bb10, %bb9 ret void } + +define half @diff_types_same_width_merge(i1 %cond, half %a, i16 %b) { +; CHECK-LABEL: @diff_types_same_width_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16 [[B:%.*]] to half +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB1]] ], [ [[A:%.*]], [[BB0]] ] +; CHECK-NEXT: ret half [[STOREMERGE]] +; +entry: + %alloca = alloca half + br i1 %cond, label %BB0, label %BB1 +BB0: + store half %a, ptr %alloca + br label %sink +BB1: + store i16 %b, ptr %alloca + br label %sink +sink: + %val = load half, ptr %alloca + ret half %val +} + +define i32 @diff_types_diff_width_no_merge(i1 %cond, i32 %a, i64 %b) { +; CHECK-LABEL: @diff_types_diff_width_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store i32 [[A:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store i64 [[B:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ALLOCA]], align 8 +; CHECK-NEXT: ret i32 [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store i32 %a, ptr %alloca + br label %sink +B: + store i64 %b, ptr %alloca + br label %sink +sink: + %val = load i32, ptr %alloca + ret i32 %val +} + +define <4 x i32> @vec_no_merge(i1 %cond, <2 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @vec_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 16 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store <2 x i32> [[A:%.*]], ptr [[ALLOCA]], align 16 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store <4 x i32> [[B:%.*]], ptr [[ALLOCA]], align 16 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load <4 x i32>, ptr [[ALLOCA]], align 16 +; CHECK-NEXT: ret <4 x i32> [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store <2 x i32> %a, ptr %alloca + br label %sink +B: + store <4 x i32> %b, ptr %alloca + br label %sink +sink: + %val = load <4 x i32>, ptr %alloca + ret <4 x i32> %val +} + +%struct.half = type { half }; + +define %struct.half @one_elem_struct_merge(i1 %cond, %struct.half %a, half %b) { +; CHECK-LABEL: @one_elem_struct_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_HALF:%.*]] [[A:%.*]], 0 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB0]] ], [ [[B:%.*]], [[BB1]] ] +; CHECK-NEXT: [[VAL1:%.*]] = insertvalue [[STRUCT_HALF]] poison, half [[STOREMERGE]], 0 +; CHECK-NEXT: ret [[STRUCT_HALF]] [[VAL1]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %BB0, label %BB1 +BB0: + store %struct.half %a, ptr %alloca + br label %sink +BB1: + store half %b, ptr %alloca + br label %sink +sink: + %val = load %struct.half, ptr %alloca + ret %struct.half %val +} + +%struct.tup = type { half, i32 }; + +define %struct.tup @multi_elem_struct_no_merge(i1 %cond, %struct.tup %a, half %b) { +; CHECK-LABEL: @multi_elem_struct_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]] +; CHECK: A: +; CHECK-NEXT: store [[STRUCT_TUP:%.*]] [[A:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: B: +; CHECK-NEXT: store half [[B:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load [[STRUCT_TUP]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: ret [[STRUCT_TUP]] [[VAL]] +; +entry: + %alloca = alloca i64 + br i1 %cond, label %A, label %B +A: + store %struct.tup %a, ptr %alloca + br label %sink +B: + store half %b, ptr %alloca + br label %sink +sink: + %val = load %struct.tup, ptr %alloca + ret %struct.tup %val +} + +define i16 @same_types_diff_align_no_merge(i1 %cond, i16 %a, i16 %b) { +; CHECK-LABEL: @same_types_diff_align_no_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i16, align 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: store i16 [[A:%.*]], ptr [[ALLOCA]], align 8 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: store i16 [[B:%.*]], ptr [[ALLOCA]], align 4 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ALLOCA]], align 4 +; CHECK-NEXT: ret i16 [[VAL]] +; +entry: + %alloca = alloca i16, align 4 + br i1 %cond, label %BB0, label %BB1 +BB0: + store i16 %a, ptr %alloca, align 8 + br label %sink +BB1: + store i16 %b, ptr %alloca, align 4 + br label %sink +sink: + %val = load i16, ptr %alloca + ret i16 %val +} + +define i64 @ptrtoint_merge(i1 %cond, i64 %a, ptr %b) { +; CHECK-LABEL: @ptrtoint_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[B:%.*]] to i64 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i64 [ [[A:%.*]], [[BB0]] ], [ [[TMP0]], [[BB1]] ] +; CHECK-NEXT: ret i64 [[STOREMERGE]] +; +entry: + %alloca = alloca ptr + br i1 %cond, label %BB0, label %BB1 +BB0: + store i64 %a, ptr %alloca + br label %sink +BB1: + store ptr %b, ptr %alloca + br label %sink +sink: + %val = load i64, ptr %alloca + ret i64 %val +} + +define ptr @inttoptr_merge(i1 %cond, i64 %a, ptr %b) { +; CHECK-LABEL: define ptr @inttoptr_merge +; CHECK-SAME: (i1 [[COND:%.*]], i64 [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: BB0: +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A]] to ptr +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: BB1: +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi ptr [ [[B]], [[BB1]] ], [ [[TMP0]], [[BB0]] ] +; CHECK-NEXT: ret ptr [[STOREMERGE]] +; +entry: + %alloca = alloca ptr + br i1 %cond, label %BB0, label %BB1 +BB0: + store i64 %a, ptr %alloca, align 8 + br label %sink +BB1: + store ptr %b, ptr %alloca, align 8 + br label %sink +sink: + %val = load ptr, ptr %alloca + ret ptr %val +} diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll --- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll @@ -1475,6 +1475,36 @@ ret i1 %r } +define i1 @ogt_zero_fabs_select_negone_or_pinf(i1 %cond) { +; CHECK-LABEL: @ogt_zero_fabs_select_negone_or_pinf( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], float -1.000000e+00, float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[ONE:%.*]] = fcmp ogt float [[FABS]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[ONE]] +; +entry: + %select = select i1 %cond, float -1.0, float 0x7FF0000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + %one = fcmp ogt float %fabs, 0.0 + ret i1 %one +} + +define i1 @ogt_zero_fabs_select_one_or_ninf(i1 %cond) { +; CHECK-LABEL: @ogt_zero_fabs_select_one_or_ninf( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float 0xFFF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[ONE:%.*]] = fcmp ogt float [[FABS]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[ONE]] +; +entry: + %select = select i1 %cond, float 1.0, float 0xFFF0000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + %one = fcmp ogt float %fabs, 0.0 + ret i1 %one +} + declare <2 x double> @llvm.fabs.v2f64(<2 x double>) declare <2 x float> @llvm.fabs.v2f32(<2 x float>) declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) diff --git a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll --- a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll +++ b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll @@ -1021,6 +1021,73 @@ ret i1 %cmp } +define i1 @not_inf_fabs_select_pzero_or_ninf(i1 %cond) { +; CHECK-LABEL: define i1 @not_inf_fabs_select_pzero_or_ninf +; CHECK-SAME: (i1 [[COND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float 0.000000e+00, float 0xFFF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[ONE:%.*]] = fcmp one float [[FABS]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[ONE]] +; +entry: + %select = select i1 %cond, float 0.000000e+00, float 0xFFF0000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + %one = fcmp one float %fabs, 0x7FF0000000000000 + ret i1 %one +} + +define i1 @not_inf_fabs_select_nzero_or_pinf(i1 %cond) { +; CHECK-LABEL: define i1 @not_inf_fabs_select_nzero_or_pinf +; CHECK-SAME: (i1 [[COND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float -0.000000e+00, float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[ONE:%.*]] = fcmp one float [[FABS]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[ONE]] +; +entry: + %select = select i1 %cond, float -0.000000e+00, float 0x7FF0000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + %one = fcmp one float %fabs, 0x7FF0000000000000 + ret i1 %one +} + +define i1 @not_ninf_fabs_select_nzero_or_pinf(i1 %cond) { +; CHECK-LABEL: define i1 @not_ninf_fabs_select_nzero_or_pinf +; CHECK-SAME: (i1 [[COND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float -0.000000e+00, float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[ONE:%.*]] = fcmp one float [[FABS]], 0xFFF0000000000000 +; CHECK-NEXT: ret i1 [[ONE]] +; +entry: + %select = select i1 %cond, float -0.000000e+00, float 0x7FF0000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + %one = fcmp one float %fabs, 0xFFF0000000000000 + ret i1 %one +} + +define i1 @not_ninf_fneg_fabs_select_nzero_or_pinf(i1 %cond) { +; CHECK-LABEL: define i1 @not_ninf_fneg_fabs_select_nzero_or_pinf +; CHECK-SAME: (i1 [[COND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float -0.000000e+00, float 0x7FF0000000000000 +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]]) +; CHECK-NEXT: [[FNEG_FABS:%.*]] = fneg float [[FABS]] +; CHECK-NEXT: [[ONE:%.*]] = fcmp one float [[FNEG_FABS]], 0xFFF0000000000000 +; CHECK-NEXT: ret i1 [[ONE]] +; +entry: + %select = select i1 %cond, float -0.000000e+00, float 0x7FF0000000000000 + %fabs = call float @llvm.fabs.f32(float %select) + %fneg.fabs = fneg float %fabs + %one = fcmp one float %fneg.fabs, 0xFFF0000000000000 + ret i1 %one +} + + declare double @llvm.arithmetic.fence.f64(double) declare double @llvm.canonicalize.f64(double) declare double @llvm.ceil.f64(double) @@ -1029,6 +1096,7 @@ declare double @llvm.exp2.f64(double) declare double @llvm.exp.f64(double) declare double @llvm.fabs.f64(double) +declare float @llvm.fabs.f32(float) declare double @llvm.floor.f64(double) declare double @llvm.fma.f64(double, double, double) declare double @llvm.fmuladd.f64(double, double, double) diff --git a/llvm/test/Transforms/InstSimplify/strictfp-sqrt-nonneg.ll b/llvm/test/Transforms/InstSimplify/strictfp-sqrt-nonneg.ll --- a/llvm/test/Transforms/InstSimplify/strictfp-sqrt-nonneg.ll +++ b/llvm/test/Transforms/InstSimplify/strictfp-sqrt-nonneg.ll @@ -81,15 +81,14 @@ ret float %sub } -; Test all the rounding modes. Exception handling shouldn't matter. +; Test all the rounding modes. Rounding mode and exception handling +; shouldn't matter. -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_u_downward(i32 %a) #0 { ; CHECK-LABEL: @nonneg_u_downward( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[A:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -97,13 +96,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_s_downward(i32 %a) #0 { ; CHECK-LABEL: @nonneg_s_downward( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[A:%.*]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.downward", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.downward", metadata !"fpexcept.ignore") #0 @@ -111,13 +108,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_u_upward(i32 %a) #0 { ; CHECK-LABEL: @nonneg_u_upward( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[A:%.*]], metadata !"round.upward", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.upward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.upward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.upward", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.upward", metadata !"fpexcept.ignore") #0 @@ -125,13 +120,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_s_upward(i32 %a) #0 { ; CHECK-LABEL: @nonneg_s_upward( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[A:%.*]], metadata !"round.upward", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.upward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.upward", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.upward", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.upward", metadata !"fpexcept.ignore") #0 @@ -139,13 +132,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_u_towardzero(i32 %a) #0 { ; CHECK-LABEL: @nonneg_u_towardzero( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[A:%.*]], metadata !"round.towardzero", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.towardzero", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.towardzero", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.towardzero", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.towardzero", metadata !"fpexcept.ignore") #0 @@ -153,13 +144,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_s_towardzero(i32 %a) #0 { ; CHECK-LABEL: @nonneg_s_towardzero( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[A:%.*]], metadata !"round.towardzero", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.towardzero", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.towardzero", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.towardzero", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.towardzero", metadata !"fpexcept.ignore") #0 @@ -167,13 +156,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_u_tonearestaway(i32 %a) #0 { ; CHECK-LABEL: @nonneg_u_tonearestaway( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[A:%.*]], metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #0 @@ -181,13 +168,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_s_tonearestaway(i32 %a) #0 { ; CHECK-LABEL: @nonneg_s_tonearestaway( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[A:%.*]], metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.tonearestaway", metadata !"fpexcept.ignore") #0 @@ -195,13 +180,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_u_dynamic(i32 %a) #0 { ; CHECK-LABEL: @nonneg_u_dynamic( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[A:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 @@ -209,13 +192,11 @@ ret float %sub } -; Negative test: should not fire due to rounding mode metadata. define float @nonneg_s_dynamic(i32 %a) #0 { ; CHECK-LABEL: @nonneg_s_dynamic( ; CHECK-NEXT: [[FPA:%.*]] = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 [[A:%.*]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] ; CHECK-NEXT: [[SQRA:%.*]] = call float @llvm.experimental.constrained.sqrt.f32(float [[FPA]], metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: [[SUB:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[SQRA]], float -0.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]] -; CHECK-NEXT: ret float [[SUB]] +; CHECK-NEXT: ret float [[SQRA]] ; %fpa = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 %sqra = call float @llvm.experimental.constrained.sqrt.f32(float %fpa, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 diff --git a/llvm/test/Transforms/LICM/hoist-add-sub.ll b/llvm/test/Transforms/LICM/hoist-add-sub.ll --- a/llvm/test/Transforms/LICM/hoist-add-sub.ll +++ b/llvm/test/Transforms/LICM/hoist-add-sub.ll @@ -165,18 +165,18 @@ } -; TODO: x + iv < 4 ==> iv < 4 - x +; x + iv < 4 ==> iv < 4 - x define i32 @test_02(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_02 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[X]], [[IV]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] @@ -391,18 +391,18 @@ ret i32 -2 } -; TODO: iv + x < 4 ==> iv < 4 - x +; iv + x < 4 ==> iv < 4 - x define i32 @test_04(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_04 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[IV]], [[X]] -; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4 +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]] ; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=riscv64 -mattr=+v -riscv-v-slp-max-vf=0 -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s +; RUN: FileCheck --input-file=%t --check-prefix=YAML %s + +; Because all of these addresses are foldable, the scalar cost should be 0 when +; computing the pointers chain cost. +; +; TODO: These are currently costed as free the indices are all constants, but we +; should check if the constants are actually foldable +define void @f(ptr %dest, i64 %i) { +; CHECK-LABEL: define void @f +; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 0 +; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: ret void +; +entry: +; YAML: Pass: slp-vectorizer +; YAML-NEXT: Name: StoresVectorized +; YAML-NEXT: Function: f +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' +; YAML-NEXT: - Cost: '-2' +; YAML-NEXT: - String: ' and with tree size ' +; YAML-NEXT: - TreeSize: '2' + %p1 = getelementptr i32, ptr %dest, i32 0 + store i32 1, ptr %p1 + %p2 = getelementptr i32, ptr %dest, i32 1 + store i32 1, ptr %p2 + %p3 = getelementptr i32, ptr %dest, i32 2 + store i32 1, ptr %p3 + %p4 = getelementptr i32, ptr %dest, i32 3 + store i32 1, ptr %p4 + ret void +} + +; When computing the scalar pointers chain cost here, there is a cost of 1 for +; the base pointer, and the rest can be folded in, so the scalar cost should be +; 1. +; +; TODO: These are currently costed as free the indices are all constants, but we +; should check if the constants are actually foldable +define void @g(ptr %dest, i64 %i) { +; CHECK-LABEL: define void @g +; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 2048 +; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: ret void +; +entry: +; YAML: Pass: slp-vectorizer +; YAML-NEXT: Name: StoresVectorized +; YAML-NEXT: Function: g +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' +; YAML-NEXT: - Cost: '-2' +; YAML-NEXT: - String: ' and with tree size ' +; YAML-NEXT: - TreeSize: '2' + %p1 = getelementptr i32, ptr %dest, i32 2048 + store i32 1, ptr %p1 + %p2 = getelementptr i32, ptr %dest, i32 2049 + store i32 1, ptr %p2 + %p3 = getelementptr i32, ptr %dest, i32 2050 + store i32 1, ptr %p3 + %p4 = getelementptr i32, ptr %dest, i32 2051 + store i32 1, ptr %p4 + ret void +} + +; When computing the scalar pointers chain cost here, there is a cost of +; 1 for the base pointer, and the rest can be folded in, so the scalar cost +; should be 1. +define void @h(ptr %dest, i32 %i) { +; CHECK-LABEL: define void @h +; CHECK-SAME: (ptr [[DEST:%.*]], i32 [[I:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr [4 x i32], ptr [[DEST]], i32 [[I]], i32 0 +; CHECK-NEXT: store <4 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: ret void +; +entry: +; YAML: Pass: slp-vectorizer +; YAML-NEXT: Name: StoresVectorized +; YAML-NEXT: Function: h +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' +; YAML-NEXT: - Cost: '-2' +; YAML-NEXT: - String: ' and with tree size ' +; YAML-NEXT: - TreeSize: '2' + %p1 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 0 + store i32 1, ptr %p1 + %p2 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 1 + store i32 1, ptr %p2 + %p3 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 2 + store i32 1, ptr %p3 + %p4 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 3 + store i32 1, ptr %p4 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll --- a/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll @@ -2,7 +2,9 @@ ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \ ; RUN: -riscv-v-slp-max-vf=0 -S | FileCheck %s -; FIXME: This should not be vectorized +; This shouldn't be vectorized as the extra address computation required for the +; vector store make it unprofitable (vle/vse don't have an offset in their +; addressing modes) %struct.2i32 = type { i32, i32 } @@ -10,7 +12,9 @@ ; CHECK-LABEL: @splat_store_v2i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr [[STRUCT_2I32:%.*]], ptr [[DEST:%.*]], i64 [[I:%.*]], i32 0 -; CHECK-NEXT: store <2 x i32> , ptr [[P1]], align 4 +; CHECK-NEXT: store i32 1, ptr [[P1]], align 4 +; CHECK-NEXT: [[P2:%.*]] = getelementptr [[STRUCT_2I32]], ptr [[DEST]], i64 [[I]], i32 1 +; CHECK-NEXT: store i32 1, ptr [[P2]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/tools/llvm-mca/RISCV/different-instruments.s b/llvm/test/tools/llvm-mca/RISCV/different-instruments.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/different-instruments.s @@ -0,0 +1,76 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -timeline -iterations=1 < %s | FileCheck %s + +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-LMUL M1 +vadd.vv v12, v12, v12 +vsetvli zero, a0, e8, m8, tu, mu +# LLVM-MCA-RISCV-LMUL M8 +vadd.vv v12, v12, v12 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 4 +# CHECK-NEXT: Total Cycles: 12 +# CHECK-NEXT: Total uOps: 4 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.33 +# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: Block RThroughput: 18.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFive7FDiv +# CHECK-NEXT: [1] - SiFive7IDiv +# CHECK-NEXT: [2] - SiFive7PipeA +# CHECK-NEXT: [3] - SiFive7PipeB +# CHECK-NEXT: [4] - SiFive7PipeV +# CHECK-NEXT: [5] - SiFive7VA +# CHECK-NEXT: [6] - SiFive7VL +# CHECK-NEXT: [7] - SiFive7VS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - 2.00 - 18.00 18.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: - - - - 2.00 2.00 - - vadd.vv v12, v12, v12 +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: - - - - 16.00 16.00 - - vadd.vv v12, v12, v12 + +# CHECK: Timeline view: +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeE . .. vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: [0,1] . DeeeE .. vadd.vv v12, v12, v12 +# CHECK-NEXT: [0,2] . DeeE .. vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: [0,3] . . DeeeE vadd.vv v12, v12, v12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/disable-im.s b/llvm/test/tools/llvm-mca/RISCV/disable-im.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/disable-im.s @@ -0,0 +1,87 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -timeline -iterations=1 -disable-im < %s | FileCheck %s + +vsetvli zero, a0, e8, m2, tu, mu +# LLVM-MCA-RISCV-LMUL M2 +vadd.vv v12, v12, v12 +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-LMUL M1 +vadd.vv v12, v12, v12 +vsetvli zero, a0, e8, m8, tu, mu +# LLVM-MCA-RISCV-LMUL M8 +vadd.vv v12, v12, v12 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 6 +# CHECK-NEXT: Total Cycles: 40 +# CHECK-NEXT: Total uOps: 6 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.15 +# CHECK-NEXT: IPC: 0.15 +# CHECK-NEXT: Block RThroughput: 48.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m2, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFive7FDiv +# CHECK-NEXT: [1] - SiFive7IDiv +# CHECK-NEXT: [2] - SiFive7PipeA +# CHECK-NEXT: [3] - SiFive7PipeB +# CHECK-NEXT: [4] - SiFive7PipeV +# CHECK-NEXT: [5] - SiFive7VA +# CHECK-NEXT: [6] - SiFive7VL +# CHECK-NEXT: [7] - SiFive7VS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - 3.00 - 48.00 48.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m2, tu, mu +# CHECK-NEXT: - - - - 16.00 16.00 - - vadd.vv v12, v12, v12 +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: - - - - 16.00 16.00 - - vadd.vv v12, v12, v12 +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: - - - - 16.00 16.00 - - vadd.vv v12, v12, v12 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 + +# CHECK: [0,0] DeeE . . . . . . . . vsetvli zero, a0, e8, m2, tu, mu +# CHECK-NEXT: [0,1] . DeeeE . . . . . . . vadd.vv v12, v12, v12 +# CHECK-NEXT: [0,2] . DeeE . . . . . . . vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: [0,3] . . . . DeeeE . . . . vadd.vv v12, v12, v12 +# CHECK-NEXT: [0,4] . . . . DeeE . . . . vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: [0,5] . . . . . . . DeeeE vadd.vv v12, v12, v12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m2, tu, mu +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 4. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: 5. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/instrument-at-start.s b/llvm/test/tools/llvm-mca/RISCV/instrument-at-start.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/instrument-at-start.s @@ -0,0 +1,64 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -timeline -iterations=1 < %s | FileCheck %s + +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-LMUL M1 +vadd.vv v12, v12, v12 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Total uOps: 2 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFive7FDiv +# CHECK-NEXT: [1] - SiFive7IDiv +# CHECK-NEXT: [2] - SiFive7PipeA +# CHECK-NEXT: [3] - SiFive7PipeB +# CHECK-NEXT: [4] - SiFive7PipeV +# CHECK-NEXT: [5] - SiFive7VA +# CHECK-NEXT: [6] - SiFive7VL +# CHECK-NEXT: [7] - SiFive7VS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - 1.00 - 2.00 2.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: - - - - 2.00 2.00 - - vadd.vv v12, v12, v12 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeE . . vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: [0,1] . DeeeE vadd.vv v12, v12, v12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/instrument-in-middle.s b/llvm/test/tools/llvm-mca/RISCV/instrument-in-middle.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/instrument-in-middle.s @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -timeline -iterations=1 < %s | FileCheck %s + +vadd.vv v12, v12, v12 +vsetvli zero, a0, e8, m8, tu, mu +# LLVM-MCA-RISCV-LMUL MF8 +vadd.vv v12, v12, v12 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 3 +# CHECK-NEXT: Total Cycles: 21 +# CHECK-NEXT: Total uOps: 3 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.14 +# CHECK-NEXT: IPC: 0.14 +# CHECK-NEXT: Block RThroughput: 17.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFive7FDiv +# CHECK-NEXT: [1] - SiFive7IDiv +# CHECK-NEXT: [2] - SiFive7PipeA +# CHECK-NEXT: [3] - SiFive7PipeB +# CHECK-NEXT: [4] - SiFive7PipeV +# CHECK-NEXT: [5] - SiFive7VA +# CHECK-NEXT: [6] - SiFive7VL +# CHECK-NEXT: [7] - SiFive7VS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - 1.00 - 17.00 17.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - - - 16.00 16.00 - - vadd.vv v12, v12, v12 +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: - - - - 1.00 1.00 - - vadd.vv v12, v12, v12 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0 + +# CHECK: [0,0] DeeeE. . . . vadd.vv v12, v12, v12 +# CHECK-NEXT: [0,1] .DeeE. . . . vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: [0,2] . . . .DeeeE vadd.vv v12, v12, v12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m8, tu, mu +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/instrument-in-region.s b/llvm/test/tools/llvm-mca/RISCV/instrument-in-region.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/instrument-in-region.s @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -timeline -iterations=1 < %s | FileCheck %s + +# LLVM-MCA-BEGIN foo +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-LMUL M1 +vadd.vv v12, v12, v12 +# LLVM-MCA-END foo + +# CHECK: [0] Code Region - foo + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Total uOps: 2 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFive7FDiv +# CHECK-NEXT: [1] - SiFive7IDiv +# CHECK-NEXT: [2] - SiFive7PipeA +# CHECK-NEXT: [3] - SiFive7PipeB +# CHECK-NEXT: [4] - SiFive7PipeV +# CHECK-NEXT: [5] - SiFive7VA +# CHECK-NEXT: [6] - SiFive7VL +# CHECK-NEXT: [7] - SiFive7VS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - 1.00 - 2.00 2.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: - - - - 2.00 2.00 - - vadd.vv v12, v12, v12 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeE . . vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: [0,1] . DeeeE vadd.vv v12, v12, v12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/instrument-straddles-region.s b/llvm/test/tools/llvm-mca/RISCV/instrument-straddles-region.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/instrument-straddles-region.s @@ -0,0 +1,69 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -timeline -iterations=1 < %s | FileCheck %s + +# LLVM-MCA-BEGIN foo +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-LMUL M1 +vadd.vv v12, v12, v12 +# LLVM-MCA-END foo +vadd.vv v12, v12, v12 + +# CHECK: [0] Code Region - foo + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 2 +# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Total uOps: 2 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.25 +# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFive7FDiv +# CHECK-NEXT: [1] - SiFive7IDiv +# CHECK-NEXT: [2] - SiFive7PipeA +# CHECK-NEXT: [3] - SiFive7PipeB +# CHECK-NEXT: [4] - SiFive7PipeV +# CHECK-NEXT: [5] - SiFive7VA +# CHECK-NEXT: [6] - SiFive7VL +# CHECK-NEXT: [7] - SiFive7VS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - 1.00 - 2.00 2.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: - - - - 2.00 2.00 - - vadd.vv v12, v12, v12 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeE . . vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: [0,1] . DeeeE vadd.vv v12, v12, v12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/lit.local.cfg b/llvm/test/tools/llvm-mca/RISCV/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'RISCV' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/tools/llvm-mca/RISCV/multiple-same-instruments.s b/llvm/test/tools/llvm-mca/RISCV/multiple-same-instruments.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/multiple-same-instruments.s @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -timeline -iterations=1 < %s | FileCheck %s + +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-LMUL M1 +vadd.vv v12, v12, v12 +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-LMUL M1 +vadd.vv v12, v12, v12 +vsub.vv v12, v12, v12 +vsetvli zero, a0, e8, m2, tu, mu +# LLVM-MCA-RISCV-LMUL M4 +vadd.vv v12, v12, v12 +vsub.vv v12, v12, v12 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 8 +# CHECK-NEXT: Total Cycles: 28 +# CHECK-NEXT: Total uOps: 8 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 0.29 +# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: Block RThroughput: 22.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 4 16.00 vsub.vv v12, v12, v12 +# CHECK-NEXT: 1 3 1.00 U vsetvli zero, a0, e8, m2, tu, mu +# CHECK-NEXT: 1 4 16.00 vadd.vv v12, v12, v12 +# CHECK-NEXT: 1 4 16.00 vsub.vv v12, v12, v12 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SiFive7FDiv +# CHECK-NEXT: [1] - SiFive7IDiv +# CHECK-NEXT: [2] - SiFive7PipeA +# CHECK-NEXT: [3] - SiFive7PipeB +# CHECK-NEXT: [4] - SiFive7PipeV +# CHECK-NEXT: [5] - SiFive7VA +# CHECK-NEXT: [6] - SiFive7VL +# CHECK-NEXT: [7] - SiFive7VS + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] +# CHECK-NEXT: - - 3.00 - 22.00 22.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: - - - - 2.00 2.00 - - vadd.vv v12, v12, v12 +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: - - - - 2.00 2.00 - - vadd.vv v12, v12, v12 +# CHECK-NEXT: - - - - 2.00 2.00 - - vsub.vv v12, v12, v12 +# CHECK-NEXT: - - 1.00 - - - - - vsetvli zero, a0, e8, m2, tu, mu +# CHECK-NEXT: - - - - 8.00 8.00 - - vadd.vv v12, v12, v12 +# CHECK-NEXT: - - - - 8.00 8.00 - - vsub.vv v12, v12, v12 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01234567 + +# CHECK: [0,0] DeeE . . . . . . vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: [0,1] . DeeeE . . . . . vadd.vv v12, v12, v12 +# CHECK-NEXT: [0,2] . DeeE . . . . . vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: [0,3] . . DeeeE . . . . vadd.vv v12, v12, v12 +# CHECK-NEXT: [0,4] . . .DeeeE . . . vsub.vv v12, v12, v12 +# CHECK-NEXT: [0,5] . . . DeeE . . . vsetvli zero, a0, e8, m2, tu, mu +# CHECK-NEXT: [0,6] . . . DeeeE. . . vadd.vv v12, v12, v12 +# CHECK-NEXT: [0,7] . . . . . DeeeE vsub.vv v12, v12, v12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m1, tu, mu +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 4. 1 0.0 0.0 0.0 vsub.vv v12, v12, v12 +# CHECK-NEXT: 5. 1 0.0 0.0 0.0 vsetvli zero, a0, e8, m2, tu, mu +# CHECK-NEXT: 6. 1 0.0 0.0 0.0 vadd.vv v12, v12, v12 +# CHECK-NEXT: 7. 1 0.0 0.0 0.0 vsub.vv v12, v12, v12 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/RISCV/riscv-instrument-no-data-is-err.s b/llvm/test/tools/llvm-mca/RISCV/riscv-instrument-no-data-is-err.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/riscv-instrument-no-data-is-err.s @@ -0,0 +1,10 @@ +# RUN: not llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s 2>&1 | FileCheck %s + +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-LMUL +vadd.vv v12, v12, v12 + +# CHECK: error: Failed to create RISCV-LMUL instrument with no data +# CHECK: # LLVM-MCA-RISCV-LMUL +# CHECK: ^ +# CHECK: error: There was an error parsing comments. diff --git a/llvm/test/tools/llvm-mca/RISCV/unknown-instrument-is-err.s b/llvm/test/tools/llvm-mca/RISCV/unknown-instrument-is-err.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/unknown-instrument-is-err.s @@ -0,0 +1,10 @@ +# RUN: not llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -timeline -iterations=1 < %s 2>&1 | FileCheck %s + +# LLVM-MCA-UNKNOWN M1 +vsetvli zero, a0, e8, m1, tu, mu +vadd.vv v12, v12, v12 + +# CHECK: error: Unknown instrumentation type in LLVM-MCA comment: UNKNOWN +# CHECK: # LLVM-MCA-UNKNOWN M1 +# CHECK: ^ +# CHECK: error: There was an error parsing comments. diff --git a/llvm/test/tools/llvm-mca/RISCV/unknown-lmul-is-err.s b/llvm/test/tools/llvm-mca/RISCV/unknown-lmul-is-err.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/RISCV/unknown-lmul-is-err.s @@ -0,0 +1,10 @@ +# RUN: not llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -iterations=1 < %s 2>&1 | FileCheck %s + +vsetvli zero, a0, e8, m1, tu, mu +# LLVM-MCA-RISCV-V MF9 +vadd.vv v12, v12, v12 + +# CHECK: error: Unknown instrumentation type in LLVM-MCA comment: RISCV-V +# CHECK: # LLVM-MCA-RISCV-V MF9 +# CHECK: ^ +# CHECK: error: There was an error parsing comments. diff --git a/llvm/test/tools/llvm-profdata/version.test b/llvm/test/tools/llvm-profdata/version.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/version.test @@ -0,0 +1,4 @@ +# RUN: llvm-profdata --version | FileCheck %s + +# CHECK: llvm-profdata +# CHECK: LLVM version {{.*}} diff --git a/llvm/tools/llvm-mca/CodeRegion.h b/llvm/tools/llvm-mca/CodeRegion.h --- a/llvm/tools/llvm-mca/CodeRegion.h +++ b/llvm/tools/llvm-mca/CodeRegion.h @@ -91,6 +91,8 @@ CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start) : Description(Desc), RangeStart(Start) {} + virtual ~CodeRegion() = default; + void addInstruction(const llvm::MCInst &Instruction) { Instructions.emplace_back(Instruction); } @@ -115,14 +117,14 @@ /// in analysis of the region. class InstrumentRegion : public CodeRegion { /// Instrument for this region. - SharedInstrument Instrument; + UniqueInstrument I; public: - InstrumentRegion(llvm::StringRef Desc, llvm::SMLoc Start, SharedInstrument I) - : CodeRegion(Desc, Start), Instrument(I) {} + InstrumentRegion(llvm::StringRef Desc, llvm::SMLoc Start, UniqueInstrument I) + : CodeRegion(Desc, Start), I(std::move(I)) {} public: - SharedInstrument getInstrument() const { return Instrument; } + Instrument *getInstrument() const { return I.get(); } }; class CodeRegionParseError final : public Error {}; @@ -142,6 +144,7 @@ public: CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) {} + virtual ~CodeRegions() = default; typedef std::vector::iterator iterator; typedef std::vector::const_iterator const_iterator; @@ -179,14 +182,14 @@ }; struct InstrumentRegions : public CodeRegions { + InstrumentRegions(llvm::SourceMgr &S); void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc, - SharedInstrument Instrument); + UniqueInstrument Instrument); void endRegion(llvm::StringRef Description, llvm::SMLoc Loc); - const SmallVector - getActiveInstruments(llvm::SMLoc Loc) const; + const SmallVector getActiveInstruments(llvm::SMLoc Loc) const; }; } // namespace mca diff --git a/llvm/tools/llvm-mca/CodeRegion.cpp b/llvm/tools/llvm-mca/CodeRegion.cpp --- a/llvm/tools/llvm-mca/CodeRegion.cpp +++ b/llvm/tools/llvm-mca/CodeRegion.cpp @@ -115,7 +115,7 @@ InstrumentRegions::InstrumentRegions(llvm::SourceMgr &S) : CodeRegions(S) {} void InstrumentRegions::beginRegion(StringRef Description, SMLoc Loc, - SharedInstrument I) { + UniqueInstrument I) { if (Description.empty()) { SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, "anonymous instrumentation regions are not permitted"); @@ -137,7 +137,8 @@ } ActiveRegions[Description] = Regions.size(); - Regions.emplace_back(std::make_unique(Description, Loc, I)); + Regions.emplace_back( + std::make_unique(Description, Loc, std::move(I))); } void InstrumentRegions::endRegion(StringRef Description, SMLoc Loc) { @@ -158,13 +159,13 @@ } } -const SmallVector +const SmallVector InstrumentRegions::getActiveInstruments(SMLoc Loc) const { - SmallVector AI; + SmallVector AI; for (auto &R : Regions) { if (R->isLocInRange(Loc)) { InstrumentRegion *IR = static_cast(R.get()); - AI.emplace_back(IR->getInstrument()); + AI.push_back(IR->getInstrument()); } } return AI; diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp --- a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp +++ b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp @@ -184,7 +184,7 @@ return; } - SharedInstrument I = IM.createInstrument(InstrumentKind, Data); + UniqueInstrument I = IM.createInstrument(InstrumentKind, Data); if (!I) { if (Data.empty()) SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error, @@ -202,7 +202,7 @@ if (Regions.isRegionActive(InstrumentKind)) Regions.endRegion(InstrumentKind, Loc); // Start new instrumentation region - Regions.beginRegion(InstrumentKind, Loc, I); + Regions.beginRegion(InstrumentKind, Loc, std::move(I)); } } // namespace mca diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -574,7 +574,7 @@ SmallVector> LoweredSequence; for (const MCInst &MCI : Insts) { SMLoc Loc = MCI.getLoc(); - const SmallVector Instruments = + const SmallVector Instruments = InstrumentRegions.getActiveInstruments(Loc); Expected> Inst = diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -3070,6 +3070,12 @@ << "Available commands: merge, show, overlap\n"; return 0; } + + if (strcmp(argv[1], "--version") == 0) { + outs() << ProgName << '\n'; + cl::PrintVersionMessage(); + return 0; + } } if (argc < 2) diff --git a/llvm/unittests/tools/llvm-mca/MCATestBase.cpp b/llvm/unittests/tools/llvm-mca/MCATestBase.cpp --- a/llvm/unittests/tools/llvm-mca/MCATestBase.cpp +++ b/llvm/unittests/tools/llvm-mca/MCATestBase.cpp @@ -68,7 +68,7 @@ auto IM = std::make_unique(*STI, *MCII); mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); - const SmallVector Instruments; + const SmallVector Instruments; SmallVector> LoweredInsts; for (const auto &MCI : Insts) { Expected> Inst = diff --git a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp --- a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp +++ b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp @@ -35,7 +35,7 @@ auto IM = std::make_unique(*STI, *MCII); mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); - const SmallVector Instruments; + const SmallVector Instruments; // Tile size = 7 for (unsigned i = 0U, E = MCIs.size(); i < E;) { for (unsigned TE = i + 7; i < TE && i < E; ++i) { @@ -127,7 +127,7 @@ mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); IB.setInstRecycleCallback(GetRecycledInst); - const SmallVector Instruments; + const SmallVector Instruments; // Tile size = 7 for (unsigned i = 0U, E = MCIs.size(); i < E;) { for (unsigned TE = i + 7; i < TE && i < E; ++i) { diff --git a/mlir/docs/BytecodeFormat.md b/mlir/docs/BytecodeFormat.md --- a/mlir/docs/BytecodeFormat.md +++ b/mlir/docs/BytecodeFormat.md @@ -339,11 +339,20 @@ numSuccessors: varint?, successors: varint[], + numUseListOrders: varint?, + useListOrders: uselist[], + regionEncoding: varint?, // (numRegions << 1) | (isIsolatedFromAbove) // regions are stored in a section if isIsolatedFromAbove regions: (region | region_section)[] } + +uselist { + indexInRange: varint?, + useListEncoding: varint, // (numIndices << 1) | (isIndexPairEncoding) + indices: varint[] +} ``` The encoding of an operation is important because this is generally the most @@ -377,6 +386,26 @@ If the operation has successors, the number of successors and the indexes of the successor blocks within the parent region are encoded. +##### Use-list orders + +The reference use-list order is assumed to be the reverse of the global +enumeration of all the op operands that one would obtain with a pre-order walk +of the IR. This order is naturally obtained by building blocks of operations +op-by-op. However, some transformations may shuffle the use-lists with respect +to this reference ordering. If any of the results of the operation have a +use-list order that is not sorted with respect to the reference use-list order, +an encoding is emitted such that it is possible to reconstruct such order after +parsing the bytecode. The encoding represents an index map from the reference +operand order to the current use-list order. A bit flag is used to detect if +this encoding is of type index-pair or not. When the bit flag is set to zero, +the element at `i` represent the position of the use `i` of the reference list +into the current use-list. When the bit flag is set to `1`, the encoding +represent index pairs `(i, j)`, which indicate that the use at position `i` of +the reference list is mapped to position `j` in the current use-list. When only +less than half of the elements in the current use-list are shuffled with respect +to the reference use-list, the index-pair encoding is used to reduce the +bytecode memory requirements. + ##### Regions If the operation has regions, the number of regions and if the regions are @@ -410,6 +439,8 @@ block_arguments { numArgs: varint?, args: block_argument[] + numUseListOrders: varint?, + useListOrders: uselist[], } block_argument { @@ -421,3 +452,6 @@ A block is encoded with an array of operations and block arguments. The first field is an encoding that combines the number of operations in the block, with a flag indicating if the block has arguments. + +Use-list orders are attached to block arguments similarly to how they are +attached to operation results. diff --git a/mlir/lib/Bytecode/Encoding.h b/mlir/include/mlir/Bytecode/Encoding.h rename from mlir/lib/Bytecode/Encoding.h rename to mlir/include/mlir/Bytecode/Encoding.h --- a/mlir/lib/Bytecode/Encoding.h +++ b/mlir/include/mlir/Bytecode/Encoding.h @@ -11,10 +11,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LIB_MLIR_BYTECODE_ENCODING_H -#define LIB_MLIR_BYTECODE_ENCODING_H +#ifndef MLIR_BYTECODE_ENCODING_H +#define MLIR_BYTECODE_ENCODING_H +#include "mlir/IR/Value.h" #include +#include namespace mlir { namespace bytecode { @@ -27,7 +29,7 @@ kMinSupportedVersion = 0, /// The current bytecode version. - kVersion = 2, + kVersion = 3, /// An arbitrary value used to fill alignment padding. kAlignmentByte = 0xCB, @@ -87,10 +89,27 @@ kHasOperands = 0b00000100, kHasSuccessors = 0b00001000, kHasInlineRegions = 0b00010000, + kHasUseListOrders = 0b00100000, // clang-format on }; } // namespace OpEncodingMask +/// Get the unique ID of a value use. We encode the unique ID combining an owner +/// number and the argument number such as if ownerID(op1) < ownerID(op2), then +/// useID(op1) < useID(op2). If uses have the same owner, then argNumber(op1) < +/// argNumber(op2) implies useID(op1) < useID(op2). +template +static inline uint64_t getUseID(OperandT &val, unsigned ownerID) { + uint32_t operandNumberID; + if constexpr (std::is_same_v) + operandNumberID = val.getOperandNumber(); + else if constexpr (std::is_same_v) + operandNumberID = val.getArgNumber(); + else + llvm_unreachable("unexpected operand type"); + return (static_cast(ownerID) << 32) | operandNumberID; +} + } // namespace bytecode } // namespace mlir diff --git a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h --- a/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/Pattern.h @@ -82,12 +82,12 @@ /// Returns the type of a pointer to an element of the memref. Type getElementPtrType(MemRefType type) const; - /// Computes sizes, strides and buffer size in bytes of `memRefType` with - /// identity layout. Emits constant ops for the static sizes of `memRefType`, - /// and uses `dynamicSizes` for the others. Emits instructions to compute - /// strides and buffer size from these sizes. + /// Computes sizes, strides and buffer size of `memRefType` with identity + /// layout. Emits constant ops for the static sizes of `memRefType`, and uses + /// `dynamicSizes` for the others. Emits instructions to compute strides and + /// buffer size from these sizes. /// - /// For example, memref<4x?xf32> emits: + /// For example, memref<4x?xf32> with `sizeInBytes = true` emits: /// `sizes[0]` = llvm.mlir.constant(4 : index) : i64 /// `sizes[1]` = `dynamicSizes[0]` /// `strides[1]` = llvm.mlir.constant(1 : index) : i64 @@ -97,19 +97,27 @@ /// %gep = llvm.getelementptr %nullptr[%size] /// : (!llvm.ptr, i64) -> !llvm.ptr /// `sizeBytes` = llvm.ptrtoint %gep : !llvm.ptr to i64 + /// + /// If `sizeInBytes = false`, memref<4x?xf32> emits: + /// `sizes[0]` = llvm.mlir.constant(4 : index) : i64 + /// `sizes[1]` = `dynamicSizes[0]` + /// `strides[1]` = llvm.mlir.constant(1 : index) : i64 + /// `strides[0]` = `sizes[0]` + /// %size = llvm.mul `sizes[0]`, `sizes[1]` : i64 void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl &sizes, - SmallVectorImpl &strides, - Value &sizeBytes) const; + SmallVectorImpl &strides, Value &size, + bool sizeInBytes = true) const; /// Computes the size of type in bytes. Value getSizeInBytes(Location loc, Type type, ConversionPatternRewriter &rewriter) const; - /// Computes total number of elements for the given shape. - Value getNumElements(Location loc, ArrayRef shape, + /// Computes total number of elements for the given MemRef and dynamicSizes. + Value getNumElements(Location loc, MemRefType memRefType, + ValueRange dynamicSizes, ConversionPatternRewriter &rewriter) const; /// Creates and populates a canonical memref descriptor struct. diff --git a/mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h b/mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h --- a/mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h +++ b/mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h @@ -20,8 +20,10 @@ using ConvertToLLVMPattern::getVoidPtrType; explicit AllocationOpLLVMLowering(StringRef opName, - LLVMTypeConverter &converter) - : ConvertToLLVMPattern(opName, &converter.getContext(), converter) {} + LLVMTypeConverter &converter, + PatternBenefit benefit = 1) + : ConvertToLLVMPattern(opName, &converter.getContext(), converter, + benefit) {} protected: /// Computes the aligned value for 'input' as follows: @@ -103,15 +105,20 @@ /// Lowering for AllocOp and AllocaOp. struct AllocLikeOpLLVMLowering : public AllocationOpLLVMLowering { explicit AllocLikeOpLLVMLowering(StringRef opName, - LLVMTypeConverter &converter) - : AllocationOpLLVMLowering(opName, converter) {} + LLVMTypeConverter &converter, + PatternBenefit benefit = 1) + : AllocationOpLLVMLowering(opName, converter, benefit) {} protected: /// Allocates the underlying buffer. Returns the allocated pointer and the /// aligned pointer. virtual std::tuple - allocateBuffer(ConversionPatternRewriter &rewriter, Location loc, - Value sizeBytes, Operation *op) const = 0; + allocateBuffer(ConversionPatternRewriter &rewriter, Location loc, Value size, + Operation *op) const = 0; + + /// Sets the flag 'requiresNumElements', specifying the Op requires the number + /// of elements instead of the size in bytes. + void setRequiresNumElements(); private: // An `alloc` is converted into a definition of a memref descriptor value and @@ -133,6 +140,10 @@ LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override; + + // Flag for specifying the Op requires the number of elements instead of the + // size in bytes. + bool requiresNumElements = false; }; } // namespace mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -172,8 +172,9 @@ LLVM_ScalarOrVectorOf, "fneg", "FNeg">; // Memory-related operations. -def LLVM_AllocaOp : LLVM_Op<"alloca", - [DeclareOpInterfaceMethods]>, +def LLVM_AllocaOp : LLVM_Op<"alloca", + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]>, LLVM_MemOpPatterns { let arguments = (ins AnyInteger:$arraySize, OptionalAttr:$alignment, @@ -232,7 +233,9 @@ } def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure, - DeclareOpInterfaceMethods]> { + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { let arguments = (ins LLVM_ScalarOrVectorOf:$base, Variadic>:$dynamicIndices, DenseI32ArrayAttr:$rawConstantIndices, @@ -316,7 +319,8 @@ } def LLVM_LoadOp : LLVM_MemAccessOpBase<"load", - [DeclareOpInterfaceMethods]> { + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { dag args = (ins Arg, "", [MemRead]>:$addr, OptionalAttr:$alignment, UnitAttr:$volatile_, @@ -388,7 +392,8 @@ } def LLVM_StoreOp : LLVM_MemAccessOpBase<"store", - [DeclareOpInterfaceMethods]> { + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { dag args = (ins LLVM_LoadableType:$value, Arg,"",[MemWrite]>:$addr, OptionalAttr:$alignment, diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -16,6 +16,7 @@ #include "mlir/IR/Types.h" #include "mlir/Interfaces/DataLayoutInterfaces.h" +#include "mlir/Interfaces/MemorySlotInterfaces.h" #include namespace llvm { @@ -103,6 +104,7 @@ class LLVMStructType : public Type::TypeBase { public: /// Inherit base constructors. @@ -198,6 +200,12 @@ LogicalResult verifyEntries(DataLayoutEntryListRef entries, Location loc) const; + + /// Destructs the struct into its indexed field types. + std::optional> getSubelementIndexMap(); + + /// Returns which type is stored at a given integer index within the struct. + Type getTypeAtIndex(Attribute index); }; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.td @@ -12,6 +12,7 @@ include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/IR/AttrTypeBase.td" include "mlir/Interfaces/DataLayoutInterfaces.td" +include "mlir/Interfaces/MemorySlotInterfaces.td" /// Base class for all LLVM dialect types. class LLVMType traits = []> @@ -24,7 +25,8 @@ //===----------------------------------------------------------------------===// def LLVMArrayType : LLVMType<"LLVMArray", "array", [ - DeclareTypeInterfaceMethods]> { + DeclareTypeInterfaceMethods, + DeclareTypeInterfaceMethods]> { let summary = "LLVM array type"; let description = [{ The `!llvm.array` type represents a fixed-size array of element types. diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -819,7 +819,7 @@ loop nests, which can be empty. }]; - // Also allow any !pdl.operation for simpler composition. Non-tensor.pad ops + // Also allow any payload operation for simpler composition. Non-tensor.pad ops // will be dropped from the results. let arguments = (ins TransformHandleTypeInterface:$target, @@ -862,7 +862,7 @@ tensor.pad operations, which can be empty. }]; - // Also allow any !pdl.operation for simpler composition. Non-tensor.pad ops + // Also allow any operation for simpler composition. Non-tensor.pad ops // will be dropped from the results. let arguments = (ins TransformHandleTypeInterface:$target, diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -636,7 +636,8 @@ UnitAttr:$selfAttr, OptionalAttr:$reductionOp, Variadic:$reductionOperands, - Variadic:$gangPrivateOperands, + Variadic:$gangPrivateOperands, + OptionalAttr:$privatizations, Variadic:$gangFirstPrivateOperands, Variadic:$dataClauseOperands, OptionalAttr:$defaultAttr); @@ -659,7 +660,9 @@ type($gangFirstPrivateOperands) `)` | `num_gangs` `(` $numGangs `:` type($numGangs) `)` | `num_workers` `(` $numWorkers `:` type($numWorkers) `)` - | `private` `(` $gangPrivateOperands `:` type($gangPrivateOperands) `)` + | `private` `(` custom( + $gangPrivateOperands, type($gangPrivateOperands), $privatizations) + `)` | `vector_length` `(` $vectorLength `:` type($vectorLength) `)` | `wait` `(` $waitOperands `:` type($waitOperands) `)` | `self` `(` $selfCond `)` @@ -701,7 +704,8 @@ UnitAttr:$selfAttr, OptionalAttr:$reductionOp, Variadic:$reductionOperands, - Variadic:$gangPrivateOperands, + Variadic:$gangPrivateOperands, + OptionalAttr:$privatizations, Variadic:$gangFirstPrivateOperands, Variadic:$dataClauseOperands, OptionalAttr:$defaultAttr); @@ -722,7 +726,9 @@ | `async` `(` $async `:` type($async) `)` | `firstprivate` `(` $gangFirstPrivateOperands `:` type($gangFirstPrivateOperands) `)` - | `private` `(` $gangPrivateOperands `:` type($gangPrivateOperands) `)` + | `private` `(` custom( + $gangPrivateOperands, type($gangPrivateOperands), $privatizations) + `)` | `wait` `(` $waitOperands `:` type($waitOperands) `)` | `self` `(` $selfCond `)` | `if` `(` $ifCond `)` @@ -1033,7 +1039,8 @@ UnitAttr:$hasWorker, UnitAttr:$hasVector, Variadic:$tileOperands, - Variadic:$privateOperands, + Variadic:$privateOperands, + OptionalAttr:$privatizations, OptionalAttr:$reductionOp, Variadic:$reductionOperands); @@ -1053,7 +1060,9 @@ `gang` `` custom($gangNum, type($gangNum), $gangStatic, type($gangStatic), $hasGang) | `worker` `` custom($workerNum, type($workerNum), $hasWorker) | `vector` `` custom($vectorLength, type($vectorLength), $hasVector) - | `private` `(` $privateOperands `:` type($privateOperands) `)` + | `private` `(` custom( + $privateOperands, type($privateOperands), $privatizations) + `)` | `tile` `(` $tileOperands `:` type($tileOperands) `)` | `reduction` `(` $reductionOperands `:` type($reductionOperands) `)` ) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h @@ -30,6 +30,12 @@ struct OffloadModuleDefaultModel : public OffloadModuleInterface::ExternalModel {}; + +template +struct DeclareTargetDefaultModel + : public DeclareTargetInterface::ExternalModel, + T> {}; + } // namespace mlir::omp #endif // MLIR_DIALECT_OPENMP_OPENMPINTERFACES_H_ diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -26,7 +26,7 @@ def OpenMP_Dialect : Dialect { let name = "omp"; let cppNamespace = "::mlir::omp"; - let dependentDialects = ["::mlir::LLVM::LLVMDialect"]; + let dependentDialects = ["::mlir::LLVM::LLVMDialect, ::mlir::func::FuncDialect"]; let useDefaultAttributePrinterParser = 1; let usePropertiesForAttributes = 1; } @@ -97,6 +97,52 @@ def OpenMP_PointerLikeType : TypeAlias; +//===----------------------------------------------------------------------===// +// 2.12.7 Declare Target Directive +//===----------------------------------------------------------------------===// + +def DeviceTypeAny : I32EnumAttrCase<"any", 0>; +def DeviceTypeHost : I32EnumAttrCase<"host", 1>; +def DeviceTypeNoHost : I32EnumAttrCase<"nohost", 2>; + +def DeclareTargetDeviceType : I32EnumAttr< + "DeclareTargetDeviceType", + "device_type clause", + [DeviceTypeAny, DeviceTypeHost, DeviceTypeNoHost]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::omp"; +} + +def DeclareTargetDeviceTypeAttr : EnumAttr { + let assemblyFormat = "`(` $value `)`"; +} + +def CaptureClauseLink : I32EnumAttrCase<"to", 0>; +def CaptureClauseTo : I32EnumAttrCase<"link", 1>; + +def DeclareTargetCaptureClause : I32EnumAttr< + "DeclareTargetCaptureClause", + "capture clause", + [CaptureClauseLink, CaptureClauseTo]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::omp"; +} + +def DeclareTargetCaptureClauseAttr : EnumAttr { + let assemblyFormat = "`(` $value `)`"; +} + +def DeclareTargetAttr : OpenMP_Attr<"DeclareTarget", "declaretarget"> { + let parameters = (ins + OptionalParameter<"DeclareTargetDeviceTypeAttr">:$device_type, + OptionalParameter<"DeclareTargetCaptureClauseAttr">:$capture_clause + ); + + let assemblyFormat = "`<` struct(params) `>`"; +} + //===----------------------------------------------------------------------===// // 2.6 parallel Construct //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -47,6 +47,74 @@ ]; } +def DeclareTargetInterface : OpInterface<"DeclareTargetInterface"> { + let description = [{ + OpenMP operations that support declare target have this interface. + For example, FuncOp's and llvm.GlobalOp/fir.GlobalOp's. This + interface allows simple manipulation and introspection of the + declare target attribute that can be applied to these operations. + }]; + + let cppNamespace = "::mlir::omp"; + + let methods = [ + InterfaceMethod< + /*description=*/[{ + Set the declare target attribute on the current operation with the + specified attribute arguments. + }], + /*retTy=*/"void", + /*methodName=*/"setDeclareTarget", + (ins "mlir::omp::DeclareTargetDeviceType":$deviceType, + "mlir::omp::DeclareTargetCaptureClause":$captureClause), [{}], [{ + $_op->setAttr("omp.declare_target", + mlir::omp::DeclareTargetAttr::get( + $_op->getContext(), + mlir::omp::DeclareTargetDeviceTypeAttr::get( + $_op->getContext(), deviceType), + mlir::omp::DeclareTargetCaptureClauseAttr::get( + $_op->getContext(), captureClause))); + }]>, + InterfaceMethod< + /*description=*/[{ + Checks if the declare target attribute has been applied and exists on the + current operation. Returns true if it exists on it, otherwise returns + false. + }], + /*retTy=*/"bool", + /*methodName=*/"isDeclareTarget", + (ins), [{}], [{ + return $_op->hasAttr("omp.declare_target"); + }]>, + InterfaceMethod< + /*description=*/[{ + Returns the DeclareTargetDeviceType segment of the DeclareTarget attribute if it + exists on the current operation. Otherwise it returns null. + }], + /*retTy=*/"mlir::omp::DeclareTargetDeviceType", + /*methodName=*/"getDeclareTargetDeviceType", + (ins), [{}], [{ + if (mlir::Attribute dTar = $_op->getAttr("omp.declare_target")) + if (auto dAttr = dTar.dyn_cast_or_null()) + return dAttr.getDeviceType().getValue(); + return {}; + }]>, + InterfaceMethod< + /*description=*/[{ + Returns the DeclareTargetCaptureClause segment of the DeclareTarget attribute if it + exists on the current operation. Otherwise it returns null. + }], + /*retTy=*/"mlir::omp::DeclareTargetCaptureClause", + /*methodName=*/"getDeclareTargetCaptureClause", + (ins), [{}], [{ + if (mlir::Attribute dTar = $_op->getAttr("omp.declare_target")) + if (auto dAttr = dTar.dyn_cast_or_null()) + return dAttr.getCaptureClause().getValue(); + return {}; + }]> + ]; +} + def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> { let description = [{ Operations that represent a module for offloading (host or device) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -93,7 +93,6 @@ // of 3x4 matrix |0.0, 0.0, 2.2, 3.3| // |0.0, 0.0, 0.0, 0.0| ``` - ``` }]; let assemblyFormat = diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -944,6 +944,8 @@ let results = (outs Tosa_Tensor:$output ); + + let hasFolder = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -77,16 +77,16 @@ ```mlir %result = transform.alternatives %scope { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): // Try a fallible transformation. %0 = transform.fallible %arg0 // ... // If succeeded, yield the the result of the transformation. - transform.yield %0 : !pdl.operation + transform.yield %0 : !transform.any_op }, { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): // Otherwise, the second alternative is tried and it always succeeds by // returning the original handle. - transform.yield %arg0 : !pdl.operation + transform.yield %arg0 : !transform.any_op } ``` }]; @@ -767,7 +767,7 @@ ```mlir transform.with_pdl_patterns { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): pdl.pattern @my_pattern : benefit(1) { %0 = pdl.operation //... // Regular PDL goes here. @@ -775,7 +775,7 @@ } sequence %arg0 failures(propagate) { - ^bb0(%arg1: !pdl.operation): + ^bb0(%arg1: !transform.any_op): %1 = pdl_match @my_pattern in %arg1 // Use %1 as handle } diff --git a/mlir/include/mlir/IR/UseDefLists.h b/mlir/include/mlir/IR/UseDefLists.h --- a/mlir/include/mlir/IR/UseDefLists.h +++ b/mlir/include/mlir/IR/UseDefLists.h @@ -44,6 +44,21 @@ /// of the SSA machinery. IROperandBase *getNextOperandUsingThisValue() { return nextUse; } + /// Initialize the use-def chain by setting the back address to self and + /// nextUse to nullptr. + void initChainWithUse(IROperandBase **self) { + assert(this == *self); + back = self; + nextUse = nullptr; + } + + /// Link the current node to next. + void linkTo(IROperandBase *next) { + nextUse = next; + if (nextUse) + nextUse->back = &nextUse; + } + protected: IROperandBase(Operation *owner) : owner(owner) {} IROperandBase(IROperandBase &&other) : owner(other.owner) { @@ -192,6 +207,30 @@ use_begin()->set(newValue); } + /// Shuffle the use-list chain according to the provided indices vector, which + /// need to represent a valid shuffle. That is, a vector of unique integers in + /// range [0, numUses - 1]. Users of this function need to guarantee the + /// validity of the indices vector. + void shuffleUseList(ArrayRef indices) { + assert((size_t)std::distance(getUses().begin(), getUses().end()) == + indices.size() && + "indices vector expected to have a number of elements equal to the " + "number of uses"); + SmallVector shuffled(indices.size()); + detail::IROperandBase *ptr = firstUse; + for (size_t idx = 0; idx < indices.size(); + idx++, ptr = ptr->getNextOperandUsingThisValue()) + shuffled[indices[idx]] = ptr; + + initFirstUse(shuffled.front()); + auto *current = firstUse; + for (auto &next : llvm::drop_begin(shuffled)) { + current->linkTo(next); + current = next; + } + current->linkTo(nullptr); + } + //===--------------------------------------------------------------------===// // Uses //===--------------------------------------------------------------------===// @@ -234,6 +273,12 @@ OperandType *getFirstUse() const { return (OperandType *)firstUse; } private: + /// Set use as the first use of the chain. + void initFirstUse(detail::IROperandBase *use) { + firstUse = use; + firstUse->initChainWithUse(&firstUse); + } + detail::IROperandBase *firstUse = nullptr; /// Allow access to `firstUse`. diff --git a/mlir/include/mlir/IR/Value.h b/mlir/include/mlir/IR/Value.h --- a/mlir/include/mlir/IR/Value.h +++ b/mlir/include/mlir/IR/Value.h @@ -187,6 +187,11 @@ /// Returns true if the value is used outside of the given block. bool isUsedOutsideOfBlock(Block *block); + /// Shuffle the use list order according to the provided indices. It is + /// responsibility of the caller to make sure that the indices map the current + /// use-list chain to another valid use-list chain. + void shuffleUseList(ArrayRef indices); + //===--------------------------------------------------------------------===// // Uses diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt --- a/mlir/include/mlir/Interfaces/CMakeLists.txt +++ b/mlir/include/mlir/Interfaces/CMakeLists.txt @@ -19,6 +19,8 @@ set(LLVM_TARGET_DEFINITIONS MemorySlotInterfaces.td) mlir_tablegen(MemorySlotOpInterfaces.h.inc -gen-op-interface-decls) mlir_tablegen(MemorySlotOpInterfaces.cpp.inc -gen-op-interface-defs) +mlir_tablegen(MemorySlotTypeInterfaces.h.inc -gen-type-interface-decls) +mlir_tablegen(MemorySlotTypeInterfaces.cpp.inc -gen-type-interface-defs) add_public_tablegen_target(MLIRMemorySlotInterfacesIncGen) add_dependencies(mlir-generic-headers MLIRMemorySlotInterfacesIncGen) diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h --- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h +++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h @@ -24,6 +24,13 @@ Type elemType; }; +/// Memory slot attached with information about its destructuring procedure. +struct DestructurableMemorySlot : public MemorySlot { + /// Maps an index within the memory slot to the type of the pointer that + /// will be generated to access the element directly. + DenseMap elementPtrs; +}; + /// Returned by operation promotion logic requesting the deletion of an /// operation. enum class DeletionKind { @@ -36,5 +43,6 @@ } // namespace mlir #include "mlir/Interfaces/MemorySlotOpInterfaces.h.inc" +#include "mlir/Interfaces/MemorySlotTypeInterfaces.h.inc" #endif // MLIR_INTERFACES_MEMORYSLOTINTERFACES_H diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td --- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td +++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td @@ -215,4 +215,158 @@ ]; } +def DestructurableAllocationOpInterface + : OpInterface<"DestructurableAllocationOpInterface"> { + let description = [{ + Describes operations allocating memory slots of aggregates that can be + destructured into multiple smaller allocations. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Returns the list of slots for which destructuring should be attempted, + specifying in which way the slot should be destructured into subslots. + The subslots are indexed by attributes. This computes the type of the + pointer for each subslot to be generated. The type of the memory slot + must implement `DestructurableTypeInterface`. + + No IR mutation is allowed in this method. + }], + "::llvm::SmallVector<::mlir::DestructurableMemorySlot>", + "getDestructurableSlots", + (ins) + >, + InterfaceMethod<[{ + Destructures this slot into multiple subslots. The newly generated slots + may belong to a different allocator. The original slot must still exist + at the end of this call. Only generates subslots for the indices found in + `usedIndices` since all other subslots are unused. + + The rewriter is located at the beginning of the block where the slot + pointer is defined. All IR mutations must happen through the rewriter. + }], + "::llvm::DenseMap<::mlir::Attribute, ::mlir::MemorySlot>", + "destructure", + (ins "const ::mlir::DestructurableMemorySlot &":$slot, + "const ::llvm::SmallPtrSetImpl<::mlir::Attribute> &":$usedIndices, + "::mlir::RewriterBase &":$rewriter) + >, + InterfaceMethod<[{ + Hook triggered once the destructuring of a slot is complete, meaning the + original slot is no longer being refered to and could be deleted. + This will only be called for slots declared by this operation. + + All IR mutations must happen through the rewriter. + }], + "void", "handleDestructuringComplete", + (ins "const ::mlir::DestructurableMemorySlot &":$slot, + "::mlir::RewriterBase &":$rewriter) + >, + ]; +} + +def SafeMemorySlotAccessOpInterface + : OpInterface<"SafeMemorySlotAccessOpInterface"> { + let description = [{ + Describes operations using memory slots in a type-safe manner. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Returns whether all accesses in this operation to the provided slot are + done in a type-safe manner. To be type-safe, the access must only load + the value in this type as the type of the slot, and without assuming any + context around the slot. For example, a type-safe load must not load + outside the bounds of the slot. + + If the type-safety of the accesses depends on the type-safety of the + accesses to further memory slots, the result of this method will be + conditioned to the type-safety of the accesses to the slots added by + this method to `mustBeSafelyUsed`. + + No IR mutation is allowed in this method. + }], + "::mlir::LogicalResult", + "ensureOnlySafeAccesses", + (ins "const ::mlir::MemorySlot &":$slot, + "::mlir::SmallVectorImpl<::mlir::MemorySlot> &":$mustBeSafelyUsed) + > + ]; +} + +def DestructurableAccessorOpInterface + : OpInterface<"DestructurableAccessorOpInterface"> { + let description = [{ + Describes operations that can access a sub-element of a destructurable slot. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + For a given destructurable memory slot, returns whether this operation can + rewire its uses of the slot to use the slots generated after + destructuring. This may involve creating new operations, and usually + amounts to checking if the pointer types match. + + This method must also register the indices it will access within the + `usedIndices` set. If the accessor generates new slots mapping to + subelements, they must be registered in `mustBeSafelyUsed` to ensure + they are used in a locally type-safe manner. + + No IR mutation is allowed in this method. + }], + "bool", + "canRewire", + (ins "const ::mlir::DestructurableMemorySlot &":$slot, + "::llvm::SmallPtrSetImpl<::mlir::Attribute> &":$usedIndices, + "::mlir::SmallVectorImpl<::mlir::MemorySlot> &":$mustBeSafelyUsed) + >, + InterfaceMethod<[{ + Rewires the use of a slot to the generated subslots, without deleting + any operation. Returns whether the accessor should be deleted. + + All IR mutations must happen through the rewriter. Deletion of + operations is not allowed, only the accessor can be scheduled for + deletion by returning the appropriate value. + }], + "::mlir::DeletionKind", + "rewire", + (ins "const ::mlir::DestructurableMemorySlot &":$slot, + "::llvm::DenseMap<::mlir::Attribute, ::mlir::MemorySlot> &":$subslots, + "::mlir::RewriterBase &":$rewriter) + > + ]; +} + +def DestructurableTypeInterface + : TypeInterface<"DestructurableTypeInterface"> { + let description = [{ + Describes a type that can be broken down into indexable sub-element types. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Destructures the type into subelements into a map of index attributes to + types of subelements. Returns nothing if the type cannot be destructured. + }], + "::std::optional<::llvm::DenseMap<::mlir::Attribute, ::mlir::Type>>", + "getSubelementIndexMap", + (ins) + >, + InterfaceMethod<[{ + Indicates which type is held at the provided index, returning a null + Type if no type could be computed. While this can return information + even when the type cannot be completely destructured, it must be coherent + with the types returned by `getSubelementIndexMap` when they exist. + }], + "::mlir::Type", + "getTypeAtIndex", + (ins "::mlir::Attribute":$index) + > + ]; +} + #endif // MLIR_INTERFACES_MEMORYSLOTINTERFACES diff --git a/mlir/include/mlir/Transforms/Mem2Reg.h b/mlir/include/mlir/Transforms/Mem2Reg.h --- a/mlir/include/mlir/Transforms/Mem2Reg.h +++ b/mlir/include/mlir/Transforms/Mem2Reg.h @@ -17,8 +17,11 @@ namespace mlir { +/// Statistics collected while applying mem2reg. struct Mem2RegStatistics { + /// Total amount of memory slots promoted. llvm::Statistic *promotedAmount = nullptr; + /// Total amount of new block arguments inserted in blocks. llvm::Statistic *newBlockArgumentAmount = nullptr; }; diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -36,6 +36,7 @@ #define GEN_PASS_DECL_MEM2REG #define GEN_PASS_DECL_PRINTIRPASS #define GEN_PASS_DECL_PRINTOPSTATS +#define GEN_PASS_DECL_SROA #define GEN_PASS_DECL_STRIPDEBUGINFO #define GEN_PASS_DECL_SCCP #define GEN_PASS_DECL_SYMBOLDCE diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -199,10 +199,10 @@ let statistics = [ Statistic<"promotedAmount", "promoted slots", - "Number of promoted memory slot">, + "Total amount of memory slot promoted">, Statistic<"newBlockArgumentAmount", "new block args", - "Total number of block arguments added">, + "Total amount of new block argument inserted in blocks">, ]; } @@ -229,6 +229,42 @@ let constructor = "mlir::createSCCPPass()"; } +def SROA : Pass<"sroa"> { + let summary = "Scalar Replacement of Aggregates"; + let description = [{ + Scalar Replacement of Aggregates. Replaces allocations of aggregates into + independant allocations of its elements. + + Allocators must implement `DestructurableAllocationOpInterface` to provide + the list of memory slots for which destructuring should be attempted. + + This pass will only be applied if all accessors of the aggregate implement + the `DestructurableAccessorOpInterface`. If the accessors provide a view + into the struct, users of the view must ensure it is used in a type-safe + manner and within bounds by implementing `TypeSafeOpInterface`. + }]; + + let statistics = [ + Statistic< + "destructuredAmount", + "destructured slots", + "Total amount of memory slots destructured" + >, + Statistic< + "slotsWithMemoryBenefit", + "slots with memory benefit", + "Total amount of memory slots in which the destructured size was smaller " + "than the total size after eliminating unused fields" + >, + Statistic< + "maxSubelementAmount", + "max subelement number", + "Maximal number of sub-elements a successfully destructured slot " + "initially had" + >, + ]; +} + def StripDebugInfo : Pass<"strip-debuginfo"> { let summary = "Strip debug info from all operations"; let description = [{ diff --git a/mlir/include/mlir/Transforms/SROA.h b/mlir/include/mlir/Transforms/SROA.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Transforms/SROA.h @@ -0,0 +1,57 @@ +//===-- SROA.h - Scalar Replacement Of Aggregates ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TRANSFORMS_SROA_H +#define MLIR_TRANSFORMS_SROA_H + +#include "mlir/IR/PatternMatch.h" +#include "mlir/Interfaces/MemorySlotInterfaces.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/Statistic.h" + +namespace mlir { + +/// Statistics collected while applying SROA. +struct SROAStatistics { + /// Total amount of memory slots destructured. + llvm::Statistic *destructuredAmount = nullptr; + /// Total amount of memory slots in which the destructured size was smaller + /// than the total size after eliminating unused fields. + llvm::Statistic *slotsWithMemoryBenefit = nullptr; + /// Maximal number of sub-elements a successfully destructured slot initially + /// had. + llvm::Statistic *maxSubelementAmount = nullptr; +}; + +/// Pattern applying SROA to the regions of the operations on which it +/// matches. +class SROAPattern + : public OpInterfaceRewritePattern { +public: + using OpInterfaceRewritePattern::OpInterfaceRewritePattern; + + SROAPattern(MLIRContext *context, SROAStatistics statistics = {}, + PatternBenefit benefit = 1) + : OpInterfaceRewritePattern(context, benefit), statistics(statistics) {} + + LogicalResult matchAndRewrite(DestructurableAllocationOpInterface allocator, + PatternRewriter &rewriter) const override; + +private: + SROAStatistics statistics; +}; + +/// Attempts to destructure the slots of destructurable allocators. Returns +/// failure if no slot was destructured. +LogicalResult tryToDestructureMemorySlots( + ArrayRef allocators, + RewriterBase &rewriter, SROAStatistics statistics = {}); + +} // namespace mlir + +#endif // MLIR_TRANSFORMS_SROA_H diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp --- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp +++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp @@ -7,12 +7,11 @@ //===----------------------------------------------------------------------===// // TODO: Support for big-endian architectures. -// TODO: Properly preserve use lists of values. #include "mlir/Bytecode/BytecodeReader.h" -#include "../Encoding.h" #include "mlir/AsmParser/AsmParser.h" #include "mlir/Bytecode/BytecodeImplementation.h" +#include "mlir/Bytecode/Encoding.h" #include "mlir/IR/BuiltinDialect.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/OpImplementation.h" @@ -29,6 +28,7 @@ #include "llvm/Support/SourceMgr.h" #include #include +#include #include #define DEBUG_TYPE "mlir-bytecode-reader" @@ -1281,6 +1281,42 @@ /// Create a value to use for a forward reference. Value createForwardRef(); + //===--------------------------------------------------------------------===// + // Use-list order helpers + + /// This struct is a simple storage that contains information required to + /// reorder the use-list of a value with respect to the pre-order traversal + /// ordering. + struct UseListOrderStorage { + UseListOrderStorage(bool isIndexPairEncoding, + SmallVector &&indices) + : indices(std::move(indices)), + isIndexPairEncoding(isIndexPairEncoding){}; + /// The vector containing the information required to reorder the + /// use-list of a value. + SmallVector indices; + + /// Whether indices represent a pair of type `(src, dst)` or it is a direct + /// indexing, such as `dst = order[src]`. + bool isIndexPairEncoding; + }; + + /// Parse use-list order from bytecode for a range of values if available. The + /// range is expected to be either a block argument or an op result range. On + /// success, return a map of the position in the range and the use-list order + /// encoding. The function assumes to know the size of the range it is + /// processing. + using UseListMapT = DenseMap; + FailureOr parseUseListOrderForRange(EncodingReader &reader, + uint64_t rangeSize); + + /// Shuffle the use-chain according to the order parsed. + LogicalResult sortUseListOrder(Value value); + + /// Recursively visit all the values defined within topLevelOp and sort the + /// use-list orders according to the indices parsed. + LogicalResult processUseLists(Operation *topLevelOp); + //===--------------------------------------------------------------------===// // Fields @@ -1341,17 +1377,27 @@ /// The reader used to process resources within the bytecode. ResourceSectionReader resourceReader; + /// Worklist of values with custom use-list orders to process before the end + /// of the parsing. + DenseMap valueToUseListMap; + /// The table of strings referenced within the bytecode file. StringSectionReader stringReader; /// The current set of available IR value scopes. std::vector valueScopes; + + /// The global pre-order operation ordering. + DenseMap operationIDs; + /// A block containing the set of operations defined to create forward /// references. Block forwardRefOps; + /// A block containing previously created, and no longer used, forward /// reference operations. Block openForwardRefOps; + /// An operation state used when instantiating forward references. OperationState forwardRefOpState; @@ -1597,6 +1643,165 @@ dialectReader, bufferOwnerRef); } +//===----------------------------------------------------------------------===// +// UseListOrder Helpers + +FailureOr +BytecodeReader::Impl::parseUseListOrderForRange(EncodingReader &reader, + uint64_t numResults) { + BytecodeReader::Impl::UseListMapT map; + uint64_t numValuesToRead = 1; + if (numResults > 1 && failed(reader.parseVarInt(numValuesToRead))) + return failure(); + + for (size_t valueIdx = 0; valueIdx < numValuesToRead; valueIdx++) { + uint64_t resultIdx = 0; + if (numResults > 1 && failed(reader.parseVarInt(resultIdx))) + return failure(); + + uint64_t numValues; + bool indexPairEncoding; + if (failed(reader.parseVarIntWithFlag(numValues, indexPairEncoding))) + return failure(); + + SmallVector useListOrders; + for (size_t idx = 0; idx < numValues; idx++) { + uint64_t index; + if (failed(reader.parseVarInt(index))) + return failure(); + useListOrders.push_back(index); + } + + // Store in a map the result index + map.try_emplace(resultIdx, UseListOrderStorage(indexPairEncoding, + std::move(useListOrders))); + } + + return map; +} + +/// Sorts each use according to the order specified in the use-list parsed. If +/// the custom use-list is not found, this means that the order needs to be +/// consistent with the reverse pre-order walk of the IR. If multiple uses lie +/// on the same operation, the order will follow the reverse operand number +/// ordering. +LogicalResult BytecodeReader::Impl::sortUseListOrder(Value value) { + // Early return for trivial use-lists. + if (value.use_empty() || value.hasOneUse()) + return success(); + + bool hasIncomingOrder = + valueToUseListMap.contains(value.getAsOpaquePointer()); + + // Compute the current order of the use-list with respect to the global + // ordering. Detect if the order is already sorted while doing so. + bool alreadySorted = true; + auto &firstUse = *value.use_begin(); + uint64_t prevID = + bytecode::getUseID(firstUse, operationIDs.at(firstUse.getOwner())); + llvm::SmallVector> currentOrder = {{0, prevID}}; + for (auto item : llvm::drop_begin(llvm::enumerate(value.getUses()))) { + uint64_t currentID = bytecode::getUseID( + item.value(), operationIDs.at(item.value().getOwner())); + alreadySorted &= prevID > currentID; + currentOrder.push_back({item.index(), currentID}); + prevID = currentID; + } + + // If the order is already sorted, and there wasn't a custom order to apply + // from the bytecode file, we are done. + if (alreadySorted && !hasIncomingOrder) + return success(); + + // If not already sorted, sort the indices of the current order by descending + // useIDs. + if (!alreadySorted) + std::sort( + currentOrder.begin(), currentOrder.end(), + [](auto elem1, auto elem2) { return elem1.second > elem2.second; }); + + if (!hasIncomingOrder) { + // If the bytecode file did not contain any custom use-list order, it means + // that the order was descending useID. Hence, shuffle by the first index + // of the `currentOrder` pair. + SmallVector shuffle = SmallVector( + llvm::map_range(currentOrder, [&](auto item) { return item.first; })); + value.shuffleUseList(shuffle); + return success(); + } + + // Pull the custom order info from the map. + UseListOrderStorage customOrder = + valueToUseListMap.at(value.getAsOpaquePointer()); + SmallVector shuffle = std::move(customOrder.indices); + uint64_t numUses = + std::distance(value.getUses().begin(), value.getUses().end()); + + // If the encoding was a pair of indices `(src, dst)` for every permutation, + // reconstruct the shuffle vector for every use. Initialize the shuffle vector + // as identity, and then apply the mapping encoded in the indices. + if (customOrder.isIndexPairEncoding) { + // Return failure if the number of indices was not representing pairs. + if (shuffle.size() & 1) + return failure(); + + SmallVector newShuffle(numUses); + size_t idx = 0; + std::iota(newShuffle.begin(), newShuffle.end(), idx); + for (idx = 0; idx < shuffle.size(); idx += 2) + newShuffle[shuffle[idx]] = shuffle[idx + 1]; + + shuffle = std::move(newShuffle); + } + + // Make sure that the indices represent a valid mapping. That is, the sum of + // all the values needs to be equal to (numUses - 1) * numUses / 2, and no + // duplicates are allowed in the list. + DenseSet set; + uint64_t accumulator = 0; + for (const auto &elem : shuffle) { + if (set.contains(elem)) + return failure(); + accumulator += elem; + set.insert(elem); + } + if (numUses != shuffle.size() || + accumulator != (((numUses - 1) * numUses) >> 1)) + return failure(); + + // Apply the current ordering map onto the shuffle vector to get the final + // use-list sorting indices before shuffling. + shuffle = SmallVector(llvm::map_range( + currentOrder, [&](auto item) { return shuffle[item.first]; })); + value.shuffleUseList(shuffle); + return success(); +} + +LogicalResult BytecodeReader::Impl::processUseLists(Operation *topLevelOp) { + // Precompute operation IDs according to the pre-order walk of the IR. We + // can't do this while parsing since parseRegions ordering is not strictly + // equal to the pre-order walk. + unsigned operationID = 0; + topLevelOp->walk( + [&](Operation *op) { operationIDs.try_emplace(op, operationID++); }); + + auto blockWalk = topLevelOp->walk([this](Block *block) { + for (auto arg : block->getArguments()) + if (failed(sortUseListOrder(arg))) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + + auto resultWalk = topLevelOp->walk([this](Operation *op) { + for (auto result : op->getResults()) + if (failed(sortUseListOrder(result))) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + + return failure(blockWalk.wasInterrupted() || resultWalk.wasInterrupted()); +} + //===----------------------------------------------------------------------===// // IR Section @@ -1627,6 +1832,11 @@ "not all forward unresolved forward operand references"); } + // Sort use-lists according to what specified in bytecode. + if (failed(processUseLists(*moduleOp))) + return reader.emitError( + "parsed use-list orders were invalid and could not be applied"); + // Resolve dialect version. for (const BytecodeDialect &byteCodeDialect : dialects) { // Parsing is complete, give an opportunity to each dialect to visit the @@ -1812,6 +2022,17 @@ } } + /// Parse the use-list orders for the results of the operation. Use-list + /// orders are available since version 3 of the bytecode. + std::optional resultIdxToUseListMap = std::nullopt; + if (version > 2 && (opMask & bytecode::OpEncodingMask::kHasUseListOrders)) { + size_t numResults = opState.types.size(); + auto parseResult = parseUseListOrderForRange(reader, numResults); + if (failed(parseResult)) + return failure(); + resultIdxToUseListMap = std::move(*parseResult); + } + /// Parse the regions of the operation. if (opMask & bytecode::OpEncodingMask::kHasInlineRegions) { uint64_t numRegions; @@ -1831,6 +2052,16 @@ if (op->getNumResults() && failed(defineValues(reader, op->getResults()))) return failure(); + /// Store a map for every value that received a custom use-list order from the + /// bytecode file. + if (resultIdxToUseListMap.has_value()) { + for (size_t idx = 0; idx < op->getNumResults(); idx++) { + if (resultIdxToUseListMap->contains(idx)) { + valueToUseListMap.try_emplace(op->getResult(idx).getAsOpaquePointer(), + resultIdxToUseListMap->at(idx)); + } + } + } return op; } @@ -1880,6 +2111,28 @@ if (hasArgs && failed(parseBlockArguments(reader, &*readState.curBlock))) return failure(); + // Uselist orders are available since version 3 of the bytecode. + if (version < 3) + return success(); + + uint8_t hasUseListOrders = 0; + if (hasArgs && failed(reader.parseByte(hasUseListOrders))) + return failure(); + + if (!hasUseListOrders) + return success(); + + Block &blk = *readState.curBlock; + auto argIdxToUseListMap = + parseUseListOrderForRange(reader, blk.getNumArguments()); + if (failed(argIdxToUseListMap) || argIdxToUseListMap->empty()) + return failure(); + + for (size_t idx = 0; idx < blk.getNumArguments(); idx++) + if (argIdxToUseListMap->contains(idx)) + valueToUseListMap.try_emplace(blk.getArgument(idx).getAsOpaquePointer(), + argIdxToUseListMap->at(idx)); + // We don't parse the operations of the block here, that's done elsewhere. return success(); } diff --git a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp --- a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp +++ b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "mlir/Bytecode/BytecodeWriter.h" -#include "../Encoding.h" #include "IRNumbering.h" #include "mlir/Bytecode/BytecodeImplementation.h" +#include "mlir/Bytecode/Encoding.h" #include "mlir/IR/OpImplementation.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/MapVector.h" @@ -470,6 +470,12 @@ void writeStringSection(EncodingEmitter &emitter); + //===--------------------------------------------------------------------===// + // Helpers + + void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask, + ValueRange range); + //===--------------------------------------------------------------------===// // Fields @@ -667,6 +673,14 @@ emitter.emitVarInt(numberingState.getNumber(arg.getType())); emitter.emitVarInt(numberingState.getNumber(arg.getLoc())); } + if (config.bytecodeVersion > 2) { + uint64_t maskOffset = emitter.size(); + uint8_t encodingMask = 0; + emitter.emitByte(0); + writeUseListOrders(emitter, encodingMask, args); + if (encodingMask) + emitter.patchByte(maskOffset, encodingMask); + } } // Emit the operations within the block. @@ -718,6 +732,11 @@ emitter.emitVarInt(numberingState.getNumber(successor)); } + // Emit the use-list orders to bytecode, so we can reconstruct the same order + // at parsing. + if (config.bytecodeVersion > 2) + writeUseListOrders(emitter, opEncodingMask, ValueRange(op->getResults())); + // Check for regions. unsigned numRegions = op->getNumRegions(); if (numRegions) @@ -749,6 +768,94 @@ } } +void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter, + uint8_t &opEncodingMask, + ValueRange range) { + // Loop over the results and store the use-list order per result index. + DenseMap> map; + for (auto item : llvm::enumerate(range)) { + auto value = item.value(); + // No need to store a custom use-list order if the result does not have + // multiple uses. + if (value.use_empty() || value.hasOneUse()) + continue; + + // For each result, assemble the list of pairs (use-list-index, + // global-value-index). While doing so, detect if the global-value-index is + // already ordered with respect to the use-list-index. + bool alreadyOrdered = true; + auto &firstUse = *value.use_begin(); + uint64_t prevID = bytecode::getUseID( + firstUse, numberingState.getNumber(firstUse.getOwner())); + llvm::SmallVector> useListPairs( + {{0, prevID}}); + + for (auto use : llvm::drop_begin(llvm::enumerate(value.getUses()))) { + uint64_t currentID = bytecode::getUseID( + use.value(), numberingState.getNumber(use.value().getOwner())); + // The use-list order achieved when building the IR at parsing always + // pushes new uses on front. Hence, if the order by unique ID is + // monotonically decreasing, a roundtrip to bytecode preserves such order. + alreadyOrdered &= (prevID > currentID); + useListPairs.push_back({use.index(), currentID}); + prevID = currentID; + } + + // Do not emit if the order is already sorted. + if (alreadyOrdered) + continue; + + // Sort the use indices by the unique ID indices in descending order. + std::sort( + useListPairs.begin(), useListPairs.end(), + [](auto elem1, auto elem2) { return elem1.second > elem2.second; }); + + map.try_emplace(item.index(), llvm::map_range(useListPairs, [](auto elem) { + return elem.first; + })); + } + + if (map.empty()) + return; + + opEncodingMask |= bytecode::OpEncodingMask::kHasUseListOrders; + // Emit the number of results that have a custom use-list order if the number + // of results is greater than one. + if (range.size() != 1) + emitter.emitVarInt(map.size()); + + for (const auto &item : map) { + auto resultIdx = item.getFirst(); + auto useListOrder = item.getSecond(); + + // Compute the number of uses that are actually shuffled. If those are less + // than half of the total uses, encoding the index pair `(src, dst)` is more + // space efficient. + size_t shuffledElements = + llvm::count_if(llvm::enumerate(useListOrder), + [](auto item) { return item.index() != item.value(); }); + bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2); + + // For single result, we don't need to store the result index. + if (range.size() != 1) + emitter.emitVarInt(resultIdx); + + if (indexPairEncoding) { + emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding); + for (auto pair : llvm::enumerate(useListOrder)) { + if (pair.index() != pair.value()) { + emitter.emitVarInt(pair.value()); + emitter.emitVarInt(pair.index()); + } + } + } else { + emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding); + for (const auto &index : useListOrder) + emitter.emitVarInt(index); + } + } +} + void BytecodeWriter::writeRegion(EncodingEmitter &emitter, Region *region) { // If the region is empty, we only need to emit the number of blocks (which is // zero). diff --git a/mlir/lib/Bytecode/Writer/IRNumbering.h b/mlir/lib/Bytecode/Writer/IRNumbering.h --- a/mlir/lib/Bytecode/Writer/IRNumbering.h +++ b/mlir/lib/Bytecode/Writer/IRNumbering.h @@ -152,6 +152,10 @@ assert(blockIDs.count(block) && "block not numbered"); return blockIDs[block]; } + unsigned getNumber(Operation *op) { + assert(operationIDs.count(op) && "operation not numbered"); + return operationIDs[op]; + } unsigned getNumber(OperationName opName) { assert(opNames.count(opName) && "opName not numbered"); return opNames[opName]->number; @@ -224,7 +228,8 @@ llvm::SpecificBumpPtrAllocator resourceAllocator; llvm::SpecificBumpPtrAllocator typeAllocator; - /// The value ID for each Block and Value. + /// The value ID for each Operation, Block and Value. + DenseMap operationIDs; DenseMap blockIDs; DenseMap valueIDs; diff --git a/mlir/lib/Bytecode/Writer/IRNumbering.cpp b/mlir/lib/Bytecode/Writer/IRNumbering.cpp --- a/mlir/lib/Bytecode/Writer/IRNumbering.cpp +++ b/mlir/lib/Bytecode/Writer/IRNumbering.cpp @@ -7,9 +7,7 @@ //===----------------------------------------------------------------------===// #include "IRNumbering.h" -#include "../Encoding.h" #include "mlir/Bytecode/BytecodeImplementation.h" -#include "mlir/Bytecode/BytecodeWriter.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/OpDefinition.h" @@ -109,6 +107,12 @@ } IRNumberingState::IRNumberingState(Operation *op) { + // Compute a global operation ID numbering according to the pre-order walk of + // the IR. This is used as reference to construct use-list orders. + unsigned operationID = 0; + op->walk( + [&](Operation *op) { operationIDs.try_emplace(op, operationID++); }); + // Number the root operation. number(*op); diff --git a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp --- a/mlir/lib/Conversion/LLVMCommon/Pattern.cpp +++ b/mlir/lib/Conversion/LLVMCommon/Pattern.cpp @@ -121,7 +121,7 @@ void ConvertToLLVMPattern::getMemRefDescriptorSizes( Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl &sizes, - SmallVectorImpl &strides, Value &sizeBytes) const { + SmallVectorImpl &strides, Value &size, bool sizeInBytes) const { assert(isConvertibleAndHasIdentityMaps(memRefType) && "layout maps must have been normalized away"); assert(count(memRefType.getShape(), ShapedType::kDynamic) == @@ -143,14 +143,14 @@ for (auto i = memRefType.getRank(); i-- > 0;) { strides[i] = runningStride; - int64_t size = memRefType.getShape()[i]; - if (size == 0) + int64_t staticSize = memRefType.getShape()[i]; + if (staticSize == 0) continue; bool useSizeAsStride = stride == 1; - if (size == ShapedType::kDynamic) + if (staticSize == ShapedType::kDynamic) stride = ShapedType::kDynamic; if (stride != ShapedType::kDynamic) - stride *= size; + stride *= staticSize; if (useSizeAsStride) runningStride = sizes[i]; @@ -160,14 +160,17 @@ else runningStride = createIndexConstant(rewriter, loc, stride); } - - // Buffer size in bytes. - Type elementType = typeConverter->convertType(memRefType.getElementType()); - Type elementPtrType = getTypeConverter()->getPointerType(elementType); - Value nullPtr = rewriter.create(loc, elementPtrType); - Value gepPtr = rewriter.create(loc, elementPtrType, elementType, - nullPtr, runningStride); - sizeBytes = rewriter.create(loc, getIndexType(), gepPtr); + if (sizeInBytes) { + // Buffer size in bytes. + Type elementType = typeConverter->convertType(memRefType.getElementType()); + Type elementPtrType = getTypeConverter()->getPointerType(elementType); + Value nullPtr = rewriter.create(loc, elementPtrType); + Value gepPtr = rewriter.create( + loc, elementPtrType, elementType, nullPtr, runningStride); + size = rewriter.create(loc, getIndexType(), gepPtr); + } else { + size = runningStride; + } } Value ConvertToLLVMPattern::getSizeInBytes( @@ -186,13 +189,30 @@ } Value ConvertToLLVMPattern::getNumElements( - Location loc, ArrayRef shape, + Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter) const { + assert(count(memRefType.getShape(), ShapedType::kDynamic) == + static_cast(dynamicSizes.size()) && + "dynamicSizes size doesn't match dynamic sizes count in memref shape"); + + Value numElements = memRefType.getRank() == 0 + ? createIndexConstant(rewriter, loc, 1) + : nullptr; + unsigned dynamicIndex = 0; + // Compute the total number of memref elements. - Value numElements = - shape.empty() ? createIndexConstant(rewriter, loc, 1) : shape.front(); - for (unsigned i = 1, e = shape.size(); i < e; ++i) - numElements = rewriter.create(loc, numElements, shape[i]); + for (int64_t staticSize : memRefType.getShape()) { + if (numElements) { + Value size = staticSize == ShapedType::kDynamic + ? dynamicSizes[dynamicIndex++] + : createIndexConstant(rewriter, loc, staticSize); + numElements = rewriter.create(loc, numElements, size); + } else { + numElements = staticSize == ShapedType::kDynamic + ? dynamicSizes[dynamicIndex++] + : createIndexConstant(rewriter, loc, staticSize); + } + } return numElements; } diff --git a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp --- a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp @@ -156,6 +156,10 @@ elementPtrType, *getTypeConverter()); } +void AllocLikeOpLLVMLowering::setRequiresNumElements() { + requiresNumElements = true; +} + LogicalResult AllocLikeOpLLVMLowering::matchAndRewrite( Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const { @@ -169,13 +173,14 @@ // zero-dimensional memref, assume a scalar (size 1). SmallVector sizes; SmallVector strides; - Value sizeBytes; + Value size; + this->getMemRefDescriptorSizes(loc, memRefType, operands, rewriter, sizes, - strides, sizeBytes); + strides, size, !requiresNumElements); // Allocate the underlying buffer. auto [allocatedPtr, alignedPtr] = - this->allocateBuffer(rewriter, loc, sizeBytes, op); + this->allocateBuffer(rewriter, loc, size, op); // Create the MemRef descriptor. auto memRefDescriptor = this->createMemRefDescriptor( diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -85,13 +85,15 @@ struct AllocaOpLowering : public AllocLikeOpLLVMLowering { AllocaOpLowering(LLVMTypeConverter &converter) : AllocLikeOpLLVMLowering(memref::AllocaOp::getOperationName(), - converter) {} + converter) { + setRequiresNumElements(); + } /// Allocates the underlying buffer using the right call. `allocatedBytePtr` /// is set to null for stack allocations. `accessAlignment` is set if /// alignment is needed post allocation (for eg. in conjunction with malloc). std::tuple allocateBuffer(ConversionPatternRewriter &rewriter, - Location loc, Value sizeBytes, + Location loc, Value size, Operation *op) const override { // With alloca, one gets a pointer to the element type right away. @@ -104,9 +106,9 @@ auto elementPtrType = getTypeConverter()->getPointerType(elementType, addrSpace); - auto allocatedElementPtr = rewriter.create( - loc, elementPtrType, elementType, sizeBytes, - allocaOp.getAlignment().value_or(0)); + auto allocatedElementPtr = + rewriter.create(loc, elementPtrType, elementType, size, + allocaOp.getAlignment().value_or(0)); return std::make_tuple(allocatedElementPtr, allocatedElementPtr); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp @@ -245,16 +245,6 @@ targetAlignment); } -/// Returns true if the given argument or result attribute is supported by the -/// inliner, false otherwise. -static bool isArgOrResAttrSupported(NamedAttribute attr) { - if (attr.getName() == LLVM::LLVMDialect::getInAllocaAttrName()) - return false; - if (attr.getName() == LLVM::LLVMDialect::getNoAliasAttrName()) - return false; - return true; -} - namespace { struct LLVMInlinerInterface : public DialectInlinerInterface { using DialectInlinerInterface::DialectInlinerInterface; @@ -287,27 +277,13 @@ << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n"); return false; } + // TODO: Generate aliasing metadata from noalias argument/result attributes. if (auto attrs = funcOp.getArgAttrs()) { for (DictionaryAttr attrDict : attrs->getAsRange()) { - for (NamedAttribute attr : attrDict) { - if (!isArgOrResAttrSupported(attr)) { - LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() - << ": unhandled argument attribute " - << attr.getName() << "\n"); - return false; - } - } - } - } - if (auto attrs = funcOp.getResAttrs()) { - for (DictionaryAttr attrDict : attrs->getAsRange()) { - for (NamedAttribute attr : attrDict) { - if (!isArgOrResAttrSupported(attr)) { - LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() - << ": unhandled return attribute " - << attr.getName() << "\n"); - return false; - } + if (attrDict.contains(LLVM::LLVMDialect::getInAllocaAttrName())) { + LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() + << ": inalloca arguments not supported\n"); + return false; } } } @@ -364,6 +340,7 @@ } // clang-format off if (isa LLVM::AllocaOp::getDestructurableSlots() { + if (!mlir::matchPattern(getArraySize(), m_One())) + return {}; + + Type elemType = getAllocaElementType(*this); + auto destructurable = dyn_cast(elemType); + if (!destructurable) + return {}; + + std::optional> destructuredType = + destructurable.getSubelementIndexMap(); + if (!destructuredType) + return {}; + + DenseMap allocaTypeMap; + for (Attribute index : llvm::make_first_range(destructuredType.value())) + allocaTypeMap.insert({index, LLVM::LLVMPointerType::get(getContext())}); + + return {DestructurableMemorySlot{{getResult(), elemType}, {allocaTypeMap}}}; +} + +DenseMap +LLVM::AllocaOp::destructure(const DestructurableMemorySlot &slot, + const SmallPtrSetImpl &usedIndices, + RewriterBase &rewriter) { + assert(slot.ptr == getResult()); + Type elemType = + getElemType() ? *getElemType() : getResult().getType().getElementType(); + + rewriter.setInsertionPointAfter(*this); + + auto destructurableType = cast(elemType); + DenseMap slotMap; + for (Attribute index : usedIndices) { + Type elemType = destructurableType.getTypeAtIndex(index); + assert(elemType && "used index must exist"); + auto subAlloca = rewriter.create( + getLoc(), LLVM::LLVMPointerType::get(getContext()), elemType, + getArraySize()); + slotMap.try_emplace(index, {subAlloca.getResult(), elemType}); + } + + return slotMap; +} + +void LLVM::AllocaOp::handleDestructuringComplete( + const DestructurableMemorySlot &slot, RewriterBase &rewriter) { + assert(slot.ptr == getResult()); + rewriter.eraseOp(*this); +} + //===----------------------------------------------------------------------===// // Interfaces for LoadOp/StoreOp //===----------------------------------------------------------------------===// @@ -115,12 +170,23 @@ // `canUsesBeRemoved` checked this blocking use must be the stored slot // pointer. for (Operation *user : slot.ptr.getUsers()) - if (auto declareOp = llvm::dyn_cast(user)) + if (auto declareOp = dyn_cast(user)) rewriter.create(declareOp->getLoc(), getValue(), declareOp.getVarInfo()); return DeletionKind::Delete; } +LogicalResult LLVM::LoadOp::ensureOnlySafeAccesses( + const MemorySlot &slot, SmallVectorImpl &mustBeSafelyUsed) { + return success(getAddr() != slot.ptr || getType() == slot.elemType); +} + +LogicalResult LLVM::StoreOp::ensureOnlySafeAccesses( + const MemorySlot &slot, SmallVectorImpl &mustBeSafelyUsed) { + return success(getAddr() != slot.ptr || + getValue().getType() == slot.elemType); +} + //===----------------------------------------------------------------------===// // Interfaces for discardable OPs //===----------------------------------------------------------------------===// @@ -189,6 +255,10 @@ return DeletionKind::Delete; } +//===----------------------------------------------------------------------===// +// Interfaces for GEPOp +//===----------------------------------------------------------------------===// + static bool hasAllZeroIndices(LLVM::GEPOp gepOp) { return llvm::all_of(gepOp.getIndices(), [](auto index) { auto indexAttr = index.template dyn_cast(); @@ -196,10 +266,6 @@ }); } -//===----------------------------------------------------------------------===// -// Interfaces for GEPOp -//===----------------------------------------------------------------------===// - bool LLVM::GEPOp::canUsesBeRemoved( const SmallPtrSetImpl &blockingUses, SmallVectorImpl &newBlockingUses) { @@ -213,3 +279,171 @@ const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter) { return DeletionKind::Delete; } + +/// Returns the type the resulting pointer of the GEP points to. If such a type +/// is not clear, returns null type. +static Type computeReachedGEPType(LLVM::GEPOp gep) { + if (gep.getIndices().empty()) + return {}; + + // Ensures all indices are static and fetches them. + SmallVector indices; + for (auto index : gep.getIndices()) { + IntegerAttr indexInt = index.dyn_cast(); + if (!indexInt) + return {}; + indices.push_back(indexInt); + } + + // Check the pointer indexing only targets the first element. + if (indices[0].getInt() != 0) + return {}; + + // Set the initial type currently being used for indexing. This will be + // updated as the indices get walked over. + std::optional maybeSelectedType = gep.getElemType(); + if (!maybeSelectedType) + return {}; + Type selectedType = *maybeSelectedType; + + // Follow the indexed elements in the gep. + for (IntegerAttr index : llvm::drop_begin(indices)) { + // Ensure the structure of the type being indexed can be reasoned about. + // This includes rejecting any potential typed pointer. + auto destructurable = selectedType.dyn_cast(); + if (!destructurable) + return {}; + + // Follow the type at the index the gep is accessing, making it the new type + // used for indexing. + Type field = destructurable.getTypeAtIndex(index); + if (!field) + return {}; + selectedType = field; + } + + // When there are no more indices, the type currently being used for indexing + // is the type of the value pointed at by the returned indexed pointer. + return selectedType; +} + +LogicalResult LLVM::GEPOp::ensureOnlySafeAccesses( + const MemorySlot &slot, SmallVectorImpl &mustBeSafelyUsed) { + if (getBase() != slot.ptr) + return success(); + if (slot.elemType != getElemType()) + return failure(); + Type reachedType = computeReachedGEPType(*this); + if (!reachedType) + return failure(); + mustBeSafelyUsed.emplace_back({getResult(), reachedType}); + return success(); +} + +bool LLVM::GEPOp::canRewire(const DestructurableMemorySlot &slot, + SmallPtrSetImpl &usedIndices, + SmallVectorImpl &mustBeSafelyUsed) { + auto basePtrType = getBase().getType().dyn_cast(); + if (!basePtrType) + return false; + + // Typed pointers are not supported. This should be removed once typed + // pointers are removed from the LLVM dialect. + if (!basePtrType.isOpaque()) + return false; + + if (getBase() != slot.ptr || slot.elemType != getElemType()) + return false; + Type reachedType = computeReachedGEPType(*this); + if (!reachedType || getIndices().size() < 2) + return false; + auto firstLevelIndex = cast(getIndices()[1]); + assert(slot.elementPtrs.contains(firstLevelIndex)); + if (!slot.elementPtrs.at(firstLevelIndex).isa()) + return false; + mustBeSafelyUsed.emplace_back({getResult(), reachedType}); + usedIndices.insert(firstLevelIndex); + return true; +} + +DeletionKind LLVM::GEPOp::rewire(const DestructurableMemorySlot &slot, + DenseMap &subslots, + RewriterBase &rewriter) { + IntegerAttr firstLevelIndex = getIndices()[1].dyn_cast(); + const MemorySlot &newSlot = subslots.at(firstLevelIndex); + + ArrayRef remainingIndices = getRawConstantIndices().slice(2); + + // If the GEP would become trivial after this transformation, eliminate it. + // A GEP should only be eliminated if it has no indices (except the first + // pointer index), as simplifying GEPs with all-zero indices would eliminate + // structure information useful for further destruction. + if (remainingIndices.empty()) { + rewriter.replaceAllUsesWith(getResult(), newSlot.ptr); + return DeletionKind::Delete; + } + + rewriter.updateRootInPlace(*this, [&]() { + // Rewire the indices by popping off the second index. + // Start with a single zero, then add the indices beyond the second. + SmallVector newIndices(1); + newIndices.append(remainingIndices.begin(), remainingIndices.end()); + setRawConstantIndices(newIndices); + + // Rewire the pointed type. + setElemType(newSlot.elemType); + + // Rewire the pointer. + getBaseMutable().assign(newSlot.ptr); + }); + + return DeletionKind::Keep; +} + +//===----------------------------------------------------------------------===// +// Interfaces for destructurable types +//===----------------------------------------------------------------------===// + +std::optional> +LLVM::LLVMStructType::getSubelementIndexMap() { + Type i32 = IntegerType::get(getContext(), 32); + DenseMap destructured; + for (const auto &[index, elemType] : llvm::enumerate(getBody())) + destructured.insert({IntegerAttr::get(i32, index), elemType}); + return destructured; +} + +Type LLVM::LLVMStructType::getTypeAtIndex(Attribute index) { + auto indexAttr = index.dyn_cast(); + if (!indexAttr || !indexAttr.getType().isInteger(32)) + return {}; + int32_t indexInt = indexAttr.getInt(); + ArrayRef body = getBody(); + if (indexInt < 0 || body.size() <= static_cast(indexInt)) + return {}; + return body[indexInt]; +} + +std::optional> +LLVM::LLVMArrayType::getSubelementIndexMap() const { + constexpr size_t maxArraySizeForDestructuring = 16; + if (getNumElements() > maxArraySizeForDestructuring) + return {}; + int32_t numElements = getNumElements(); + + Type i32 = IntegerType::get(getContext(), 32); + DenseMap destructured; + for (int32_t index = 0; index < numElements; ++index) + destructured.insert({IntegerAttr::get(i32, index), getElementType()}); + return destructured; +} + +Type LLVM::LLVMArrayType::getTypeAtIndex(Attribute index) const { + auto indexAttr = index.dyn_cast(); + if (!indexAttr || !indexAttr.getType().isInteger(32)) + return {}; + int32_t indexInt = indexAttr.getInt(); + if (indexInt < 0 || getNumElements() <= static_cast(indexInt)) + return {}; + return getElementType(); +} diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -436,6 +436,43 @@ return success(); } +//===----------------------------------------------------------------------===// +// Custom parser and printer verifier for private clause +//===----------------------------------------------------------------------===// + +static ParseResult parsePrivatizationList( + mlir::OpAsmParser &parser, + llvm::SmallVectorImpl &operands, + llvm::SmallVectorImpl &types, mlir::ArrayAttr &privatizationSymbols) { + llvm::SmallVector privatizationVec; + if (failed(parser.parseCommaSeparatedList([&]() { + if (parser.parseAttribute(privatizationVec.emplace_back()) || + parser.parseArrow() || + parser.parseOperand(operands.emplace_back()) || + parser.parseColonType(types.emplace_back())) + return failure(); + return success(); + }))) + return failure(); + llvm::SmallVector privatizations(privatizationVec.begin(), + privatizationVec.end()); + privatizationSymbols = ArrayAttr::get(parser.getContext(), privatizations); + return success(); +} + +static void +printPrivatizationList(mlir::OpAsmPrinter &p, mlir::Operation *op, + mlir::OperandRange privateOperands, + mlir::TypeRange privateTypes, + std::optional privatizations) { + for (unsigned i = 0, e = privatizations->size(); i < e; ++i) { + if (i != 0) + p << ", "; + p << (*privatizations)[i] << " -> " << privateOperands[i] << " : " + << privateOperands[i].getType(); + } +} + //===----------------------------------------------------------------------===// // ParallelOp //===----------------------------------------------------------------------===// @@ -455,6 +492,45 @@ return success(); } +static LogicalResult +checkPrivatizationList(Operation *op, + std::optional privatizations, + mlir::OperandRange privateOperands) { + if (!privateOperands.empty()) { + if (!privatizations || privatizations->size() != privateOperands.size()) + return op->emitOpError() << "expected as many privatizations symbol " + "reference as private operands"; + } else { + if (privatizations) + return op->emitOpError() << "unexpected privatizations symbol reference"; + return success(); + } + + llvm::DenseSet privates; + for (auto args : llvm::zip(privateOperands, *privatizations)) { + mlir::Value privateOperand = std::get<0>(args); + + if (!privates.insert(privateOperand).second) + return op->emitOpError() << "private operand appears more than once"; + + mlir::Type varType = privateOperand.getType(); + auto symbolRef = std::get<1>(args).cast(); + auto decl = + SymbolTable::lookupNearestSymbolFrom(op, symbolRef); + if (!decl) + return op->emitOpError() << "expected symbol reference " << symbolRef + << " to point to a private declaration"; + + if (decl.getType() && decl.getType() != varType) + return op->emitOpError() + << "expected private (" << varType + << ") to be the same type as private declaration (" + << decl.getType() << ")"; + } + + return success(); +} + unsigned ParallelOp::getNumDataOperands() { return getReductionOperands().size() + getGangPrivateOperands().size() + getGangFirstPrivateOperands().size() + getDataClauseOperands().size(); @@ -471,6 +547,9 @@ } LogicalResult acc::ParallelOp::verify() { + if (failed(checkPrivatizationList(*this, getPrivatizations(), + getGangPrivateOperands()))) + return failure(); return checkDataOperands(*this, getDataClauseOperands()); } @@ -647,6 +726,10 @@ if (getSeq() && (getHasGang() || getHasWorker() || getHasVector())) return emitError("gang, worker or vector cannot appear with the seq attr"); + if (failed(checkPrivatizationList(*this, getPrivatizations(), + getPrivateOperands()))) + return failure(); + // Check non-empty body(). if (getRegion().empty()) return emitError("expected non-empty body."); diff --git a/mlir/lib/Dialect/OpenMP/CMakeLists.txt b/mlir/lib/Dialect/OpenMP/CMakeLists.txt --- a/mlir/lib/Dialect/OpenMP/CMakeLists.txt +++ b/mlir/lib/Dialect/OpenMP/CMakeLists.txt @@ -12,4 +12,5 @@ LINK_LIBS PUBLIC MLIRIR MLIRLLVMDialect + MLIRFuncDialect ) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/DialectImplementation.h" @@ -71,8 +72,23 @@ MemRefType::attachInterface>(*getContext()); LLVM::LLVMPointerType::attachInterface< PointerLikeModel>(*getContext()); + + // Attach default offload module interface to module op to access + // offload functionality through mlir::ModuleOp::attachInterface( *getContext()); + + // Attach default declare target interfaces to operations which can be marked + // as declare target (Global Operations and Functions/Subroutines in dialects + // that Fortran (or other languages that lower to MLIR) translates too + mlir::LLVM::GlobalOp::attachInterface< + mlir::omp::DeclareTargetDefaultModel>( + *getContext()); + mlir::LLVM::LLVMFuncOp::attachInterface< + mlir::omp::DeclareTargetDefaultModel>( + *getContext()); + mlir::func::FuncOp::attachInterface< + mlir::omp::DeclareTargetDefaultModel>(*getContext()); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1087,6 +1087,54 @@ } }; +/// Bufferization of tensor.splat. Bufferizes to a new allocation that is filled +/// with a linalg.map. Similar to tensor.generate. +struct SplatOpInterface + : public BufferizableOpInterface::ExternalModel { + + bool bufferizesToAllocation(Operation *op, OpResult opResult) const { + return true; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const BufferizationOptions &options) const { + OpBuilder::InsertionGuard g(rewriter); + auto splatOp = cast(op); + + // Should the buffer be deallocated? + bool dealloc = + shouldDeallocateOpResult(cast(splatOp.getResult()), options); + + // TODO: Implement memory space for this op. + if (options.defaultMemorySpace != Attribute()) + return op->emitError("memory space not implemented yet"); + + // Allocate memory. + Location loc = op->getLoc(); + FailureOr tensorAlloc = + allocateTensorForShapedValue(rewriter, loc, splatOp.getResult(), + /*escape=*/!dealloc, options, + /*copy=*/false); + if (failed(tensorAlloc)) + return failure(); + + // Create linalg::MapOp. + auto tensorType = cast(tensorAlloc->getType()); + auto linalgOp = + rewriter.create(loc, tensorType, /*inputs=*/ValueRange(), + /*init=*/*tensorAlloc); + Block &linalgBody = linalgOp.getMapper().emplaceBlock(); + + // Create linalg::IndexOps. + rewriter.setInsertionPointToStart(&linalgBody); + rewriter.create(loc, splatOp.getInput()); + rewriter.replaceOp(splatOp, linalgOp.getResult()[0]); + + return success(); + } +}; + } // namespace } // namespace tensor } // namespace mlir @@ -1110,6 +1158,7 @@ *ctx); RankOp::attachInterface(*ctx); ReshapeOp::attachInterface(*ctx); + SplatOp::attachInterface(*ctx); // Load additional dialects of which ops may get created. ctx->loadDialect(); diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -1027,3 +1027,13 @@ return {}; } + +OpFoldResult tosa::AbsOp::fold(FoldAdaptor adaptor) { + auto input = getInput1(); + // Element-wise abs(abs(x)) = abs(x) + if (auto op = input.getDefiningOp()) { + return input; + } + + return {}; +} diff --git a/mlir/lib/IR/Value.cpp b/mlir/lib/IR/Value.cpp --- a/mlir/lib/IR/Value.cpp +++ b/mlir/lib/IR/Value.cpp @@ -93,6 +93,11 @@ }); } +/// Shuffles the use-list order according to the provided indices. +void Value::shuffleUseList(ArrayRef indices) { + getImpl()->shuffleUseList(indices); +} + //===----------------------------------------------------------------------===// // OpResult //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp --- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp +++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp @@ -9,3 +9,4 @@ #include "mlir/Interfaces/MemorySlotInterfaces.h" #include "mlir/Interfaces/MemorySlotOpInterfaces.cpp.inc" +#include "mlir/Interfaces/MemorySlotTypeInterfaces.cpp.inc" diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -694,6 +694,11 @@ return taskOp.emitError("unhandled clauses for translation to LLVM IR"); } auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + // Save the alloca insertion point on ModuleTranslation stack for use in + // nested regions. + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocaIP); + builder.restoreIP(codegenIP); convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder, moduleTranslation, bodyGenStatus); diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -12,6 +12,7 @@ OpStats.cpp PrintIR.cpp SCCP.cpp + SROA.cpp StripDebugInfo.cpp SymbolDCE.cpp SymbolPrivatize.cpp diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp --- a/mlir/lib/Transforms/Mem2Reg.cpp +++ b/mlir/lib/Transforms/Mem2Reg.cpp @@ -578,8 +578,6 @@ LogicalResult mlir::tryToPromoteMemorySlots( ArrayRef allocators, RewriterBase &rewriter, Mem2RegStatistics statistics) { - DominanceInfo dominance; - bool promotedAny = false; for (PromotableAllocationOpInterface allocator : allocators) { diff --git a/mlir/lib/Transforms/SROA.cpp b/mlir/lib/Transforms/SROA.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Transforms/SROA.cpp @@ -0,0 +1,235 @@ +//===-- SROA.cpp - Scalar Replacement Of Aggregates -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Transforms/SROA.h" +#include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Interfaces/MemorySlotInterfaces.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" + +namespace mlir { +#define GEN_PASS_DEF_SROA +#include "mlir/Transforms/Passes.h.inc" +} // namespace mlir + +#define DEBUG_TYPE "sroa" + +using namespace mlir; + +namespace { + +/// Information computed by destructurable memory slot analysis used to perform +/// actual destructuring of the slot. This struct is only constructed if +/// destructuring is possible, and contains the necessary data to perform it. +struct MemorySlotDestructuringInfo { + /// Set of the indices that are actually used when accessing the subelements. + SmallPtrSet usedIndices; + /// Blocking uses of a given user of the memory slot that must be eliminated. + DenseMap> userToBlockingUses; + /// List of potentially indirect accessors of the memory slot that need + /// rewiring. + SmallVector accessors; +}; + +} // namespace + +/// Computes information for slot destructuring. This will compute whether this +/// slot can be destructured and data to perform the destructuring. Returns +/// nothing if the slot cannot be destructured or if there is no useful work to +/// be done. +static std::optional +computeDestructuringInfo(DestructurableMemorySlot &slot) { + assert(isa(slot.elemType)); + + if (slot.ptr.use_empty()) + return {}; + + MemorySlotDestructuringInfo info; + + SmallVector usedSafelyWorklist; + + auto scheduleAsBlockingUse = [&](OpOperand &use) { + SmallPtrSetImpl &blockingUses = + info.userToBlockingUses.getOrInsertDefault(use.getOwner()); + blockingUses.insert(&use); + }; + + // Initialize the analysis with the immediate users of the slot. + for (OpOperand &use : slot.ptr.getUses()) { + if (auto accessor = + dyn_cast(use.getOwner())) { + if (accessor.canRewire(slot, info.usedIndices, usedSafelyWorklist)) { + info.accessors.push_back(accessor); + continue; + } + } + + // If it cannot be shown that the operation uses the slot safely, maybe it + // can be promoted out of using the slot? + scheduleAsBlockingUse(use); + } + + SmallPtrSet visited; + while (!usedSafelyWorklist.empty()) { + MemorySlot mustBeUsedSafely = usedSafelyWorklist.pop_back_val(); + for (OpOperand &subslotUse : mustBeUsedSafely.ptr.getUses()) { + if (!visited.insert(&subslotUse).second) + continue; + Operation *subslotUser = subslotUse.getOwner(); + + if (auto memOp = dyn_cast(subslotUser)) + if (succeeded(memOp.ensureOnlySafeAccesses(mustBeUsedSafely, + usedSafelyWorklist))) + continue; + + // If it cannot be shown that the operation uses the slot safely, maybe it + // can be promoted out of using the slot? + scheduleAsBlockingUse(subslotUse); + } + } + + SetVector forwardSlice; + mlir::getForwardSlice(slot.ptr, &forwardSlice); + for (Operation *user : forwardSlice) { + // If the next operation has no blocking uses, everything is fine. + if (!info.userToBlockingUses.contains(user)) + continue; + + SmallPtrSet &blockingUses = info.userToBlockingUses[user]; + auto promotable = dyn_cast(user); + + // An operation that has blocking uses must be promoted. If it is not + // promotable, destructuring must fail. + if (!promotable) + return {}; + + SmallVector newBlockingUses; + // If the operation decides it cannot deal with removing the blocking uses, + // destructuring must fail. + if (!promotable.canUsesBeRemoved(blockingUses, newBlockingUses)) + return {}; + + // Then, register any new blocking uses for coming operations. + for (OpOperand *blockingUse : newBlockingUses) { + assert(llvm::is_contained(user->getResults(), blockingUse->get())); + + SmallPtrSetImpl &newUserBlockingUseSet = + info.userToBlockingUses.getOrInsertDefault(blockingUse->getOwner()); + newUserBlockingUseSet.insert(blockingUse); + } + } + + return info; +} + +/// Performs the destructuring of a destructible slot given associated +/// destructuring information. The provided slot will be destructured in +/// subslots as specified by its allocator. +static void destructureSlot(DestructurableMemorySlot &slot, + DestructurableAllocationOpInterface allocator, + RewriterBase &rewriter, + MemorySlotDestructuringInfo &info, + const SROAStatistics &statistics) { + RewriterBase::InsertionGuard guard(rewriter); + + rewriter.setInsertionPointToStart(slot.ptr.getParentBlock()); + DenseMap subslots = + allocator.destructure(slot, info.usedIndices, rewriter); + + if (statistics.slotsWithMemoryBenefit && + slot.elementPtrs.size() != info.usedIndices.size()) + (*statistics.slotsWithMemoryBenefit)++; + + if (statistics.maxSubelementAmount) + statistics.maxSubelementAmount->updateMax(slot.elementPtrs.size()); + + SetVector usersToRewire; + for (Operation *user : llvm::make_first_range(info.userToBlockingUses)) + usersToRewire.insert(user); + for (DestructurableAccessorOpInterface accessor : info.accessors) + usersToRewire.insert(accessor); + usersToRewire = mlir::topologicalSort(usersToRewire); + + llvm::SmallVector toErase; + for (Operation *toRewire : llvm::reverse(usersToRewire)) { + rewriter.setInsertionPointAfter(toRewire); + if (auto accessor = dyn_cast(toRewire)) { + if (accessor.rewire(slot, subslots, rewriter) == DeletionKind::Delete) + toErase.push_back(accessor); + continue; + } + + auto promotable = cast(toRewire); + if (promotable.removeBlockingUses(info.userToBlockingUses[promotable], + rewriter) == DeletionKind::Delete) + toErase.push_back(promotable); + } + + for (Operation *toEraseOp : toErase) + rewriter.eraseOp(toEraseOp); + + assert(slot.ptr.use_empty() && "after destructuring, the original slot " + "pointer should no longer be used"); + + LLVM_DEBUG(llvm::dbgs() << "[sroa] Destructured memory slot: " << slot.ptr + << "\n"); + + if (statistics.destructuredAmount) + (*statistics.destructuredAmount)++; + + allocator.handleDestructuringComplete(slot, rewriter); +} + +LogicalResult mlir::tryToDestructureMemorySlots( + ArrayRef allocators, + RewriterBase &rewriter, SROAStatistics statistics) { + bool destructuredAny = false; + + for (DestructurableAllocationOpInterface allocator : allocators) { + for (DestructurableMemorySlot slot : allocator.getDestructurableSlots()) { + std::optional info = + computeDestructuringInfo(slot); + if (!info) + continue; + + destructureSlot(slot, allocator, rewriter, *info, statistics); + destructuredAny = true; + } + } + + return success(destructuredAny); +} + +LogicalResult +SROAPattern::matchAndRewrite(DestructurableAllocationOpInterface allocator, + PatternRewriter &rewriter) const { + hasBoundedRewriteRecursion(); + return tryToDestructureMemorySlots({allocator}, rewriter, statistics); +} + +namespace { + +struct SROA : public impl::SROABase { + using impl::SROABase::SROABase; + + void runOnOperation() override { + Operation *scopeOp = getOperation(); + + SROAStatistics statistics{&destructuredAmount, &slotsWithMemoryBenefit, + &maxSubelementAmount}; + + RewritePatternSet rewritePatterns(&getContext()); + rewritePatterns.add(&getContext(), statistics); + FrozenRewritePatternSet frozen(std::move(rewritePatterns)); + + if (failed(applyPatternsAndFoldGreedily(scopeOp, frozen))) + signalPassFailure(); + } +}; + +} // namespace diff --git a/mlir/python/mlir/ir.py b/mlir/python/mlir/ir.py --- a/mlir/python/mlir/ir.py +++ b/mlir/python/mlir/ir.py @@ -27,7 +27,7 @@ @register_attribute_builder("I16Attr") -def _i32Attr(x, context): +def _i16Attr(x, context): return IntegerAttr.get(IntegerType.get_signless(16, context=context), x) @@ -41,6 +41,26 @@ return IntegerAttr.get(IntegerType.get_signless(64, context=context), x) +@register_attribute_builder("SI16Attr") +def _si16Attr(x, context): + return IntegerAttr.get(IntegerType.get_signed(16, context=context), x) + + +@register_attribute_builder("SI32Attr") +def _si32Attr(x, context): + return IntegerAttr.get(IntegerType.get_signed(32, context=context), x) + + +@register_attribute_builder("F32Attr") +def _f32Attr(x, context): + return FloatAttr.get_f32(x, context=context) + + +@register_attribute_builder("F64Attr") +def _f64Attr(x, context): + return FloatAttr.get_f64(x, context=context) + + @register_attribute_builder("StrAttr") def _stringAttr(x, context): return StringAttr.get(x, context=context) @@ -61,11 +81,26 @@ return ArrayAttr.get(x, context=context) +@register_attribute_builder("I32ArrayAttr") +def _i32ArrayAttr(x, context): + return ArrayAttr.get([_i32Attr(v, context) for v in x]) + + @register_attribute_builder("I64ArrayAttr") def _i64ArrayAttr(x, context): return ArrayAttr.get([_i64Attr(v, context) for v in x]) +@register_attribute_builder("F32ArrayAttr") +def _f32ArrayAttr(x, context): + return ArrayAttr.get([_f32Attr(v, context) for v in x]) + + +@register_attribute_builder("F64ArrayAttr") +def _f64ArrayAttr(x, context): + return ArrayAttr.get([_f64Attr(v, context) for v in x]) + + @register_attribute_builder("DenseI64ArrayAttr") def _denseI64ArrayAttr(x, context): return DenseI64ArrayAttr.get(x, context=context) diff --git a/mlir/test/Bytecode/invalid/invalid-structure.mlir b/mlir/test/Bytecode/invalid/invalid-structure.mlir --- a/mlir/test/Bytecode/invalid/invalid-structure.mlir +++ b/mlir/test/Bytecode/invalid/invalid-structure.mlir @@ -9,7 +9,7 @@ //===--------------------------------------------------------------------===// // RUN: not mlir-opt %S/invalid-structure-version.mlirbc 2>&1 | FileCheck %s --check-prefix=VERSION -// VERSION: bytecode version 127 is newer than the current version 2 +// VERSION: bytecode version 127 is newer than the current version 3 //===--------------------------------------------------------------------===// // Producer diff --git a/mlir/test/Bytecode/uselist_orders.mlir b/mlir/test/Bytecode/uselist_orders.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Bytecode/uselist_orders.mlir @@ -0,0 +1,63 @@ +// RUN: mlir-opt %s -split-input-file --test-verify-uselistorder -verify-diagnostics + +// COM: --test-verify-uselistorder will randomly shuffle the uselist of every +// value and do a roundtrip to bytecode. An error is returned if the +// uselist order are not preserved when doing a roundtrip to bytecode. The +// test needs to verify diagnostics to be functional. + +func.func @base_test(%arg0 : i32) -> i32 { + %0 = arith.constant 45 : i32 + %1 = arith.constant 46 : i32 + %2 = "test.addi"(%arg0, %arg0) : (i32, i32) -> i32 + %3 = "test.addi"(%2, %0) : (i32, i32) -> i32 + %4 = "test.addi"(%2, %1) : (i32, i32) -> i32 + %5 = "test.addi"(%3, %4) : (i32, i32) -> i32 + %6 = "test.addi"(%5, %4) : (i32, i32) -> i32 + %7 = "test.addi"(%6, %4) : (i32, i32) -> i32 + return %7 : i32 +} + +// ----- + +func.func @test_with_multiple_uses_in_same_op(%arg0 : i32) -> i32 { + %0 = arith.constant 45 : i32 + %1 = arith.constant 46 : i32 + %2 = "test.addi"(%arg0, %arg0) : (i32, i32) -> i32 + %3 = "test.addi"(%2, %0) : (i32, i32) -> i32 + %4 = "test.addi"(%2, %1) : (i32, i32) -> i32 + %5 = "test.addi"(%2, %2) : (i32, i32) -> i32 + %6 = "test.addi"(%3, %4) : (i32, i32) -> i32 + %7 = "test.addi"(%6, %5) : (i32, i32) -> i32 + %8 = "test.addi"(%7, %4) : (i32, i32) -> i32 + %9 = "test.addi"(%8, %4) : (i32, i32) -> i32 + return %9 : i32 +} + +// ----- + +func.func @test_with_multiple_block_arg_uses(%arg0 : i32) -> i32 { + %0 = arith.constant 45 : i32 + %1 = arith.constant 46 : i32 + %2 = "test.addi"(%arg0, %arg0) : (i32, i32) -> i32 + %3 = "test.addi"(%2, %arg0) : (i32, i32) -> i32 + %4 = "test.addi"(%2, %1) : (i32, i32) -> i32 + %5 = "test.addi"(%2, %2) : (i32, i32) -> i32 + %6 = "test.addi"(%3, %4) : (i32, i32) -> i32 + %7 = "test.addi"(%6, %5) : (i32, i32) -> i32 + %8 = "test.addi"(%7, %4) : (i32, i32) -> i32 + %9 = "test.addi"(%8, %4) : (i32, i32) -> i32 + return %9 : i32 +} + +// ----- + +// Test that use-lists in region with no dominance are preserved +test.graph_region { + %0 = "test.foo"(%1) : (i32) -> i32 + test.graph_region attributes {a} { + %a = "test.a"(%b) : (i32) -> i32 + %b = "test.b"(%2) : (i32) -> i32 + } + %1 = "test.bar"(%2) : (i32) -> i32 + %2 = "test.baz"() : () -> i32 +} diff --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir --- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir @@ -91,12 +91,15 @@ %j = arith.constant 16 : index gpu.subgroup_mma_store_matrix %arg0, %sg[%i,%j] {leadDimension= 32 : index, transpose} : !gpu.mma_matrix<16x16xf16, "COp">, memref<32x32xf16, 3> // CHECK: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i64 - // CHECK: %{{.*}} = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] + // CHECK: %{{.*}} = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] + // CHECK: %{{.*}} = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] + // CHECK: %{{.*}} = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] + // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] // CHECK: %[[EL1:.*]] = llvm.extractvalue %[[D]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[EL2:.*]] = llvm.extractvalue %[[D]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[EL3:.*]] = llvm.extractvalue %[[D]][2] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[EL4:.*]] = llvm.extractvalue %[[D]][3] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> - // CHECK: %[[BASE:.*]] = llvm.extractvalue %17[1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BASE:.*]] = llvm.extractvalue %[[MEMREF]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[LDM:.*]] = llvm.mlir.constant(32 : index) : i64 // CHECK: %[[LI:.*]] = llvm.mul %[[INX]], %[[LDM]] : i64 // CHECK: %[[LIJ:.*]] = llvm.add %[[LI]], %[[INX]] : i64 @@ -107,12 +110,15 @@ // CHECK: llvm.return // CHECK32: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i32 - // CHECK32: %{{.*}} = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] + // CHECK32: %{{.*}} = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] + // CHECK32: %{{.*}} = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] + // CHECK32: %{{.*}} = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] + // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[{{.*}}, {{.*}}] // CHECK32: %[[EL1:.*]] = llvm.extractvalue %[[D]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: %[[EL2:.*]] = llvm.extractvalue %[[D]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: %[[EL3:.*]] = llvm.extractvalue %[[D]][2] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: %[[EL4:.*]] = llvm.extractvalue %[[D]][3] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> - // CHECK32: %[[BASE:.*]] = llvm.extractvalue %17[1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BASE:.*]] = llvm.extractvalue %[[MEMREF]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[LDM:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK32: %[[LI:.*]] = llvm.mul %[[INX]], %[[LDM]] : i32 // CHECK32: %[[LIJ:.*]] = llvm.add %[[LI]], %[[INX]] : i32 diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir --- a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir @@ -86,10 +86,7 @@ // CHECK-DAG: %[[N:.*]] = builtin.unrealized_conversion_cast %[[Narg]] // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK-NEXT: %[[num_elems:.*]] = llvm.mul %[[N]], %[[M]] : i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK-NEXT: %[[sz_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to i64 -// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[sz_bytes]] x f32 : (i64) -> !llvm.ptr +// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[num_elems]] x f32 : (i64) -> !llvm.ptr // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir --- a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir @@ -79,10 +79,7 @@ // CHECK: %[[sz2:.*]] = llvm.mlir.constant(18 : index) : i64 // CHECK: %[[st2:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[num_elems:.*]] = llvm.mlir.constant(576 : index) : i64 -// CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to i64 -// CHECK: %[[allocated:.*]] = llvm.alloca %[[size_bytes]] x f32 : (i64) -> !llvm.ptr +// CHECK: %[[allocated:.*]] = llvm.alloca %[[num_elems]] x f32 : (i64) -> !llvm.ptr %0 = memref.alloca() : memref<32x18xf32> // Test with explicitly specified alignment. llvm.alloca takes care of the diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir --- a/mlir/test/Dialect/LLVMIR/inlining.mlir +++ b/mlir/test/Dialect/LLVMIR/inlining.mlir @@ -12,15 +12,16 @@ llvm.intr.dbg.value #variable = %0 : i32 llvm.intr.dbg.declare #variableAddr = %ptr : !llvm.ptr %byte = llvm.mlir.constant(43 : i8) : i8 - %volatile = llvm.mlir.constant(1 : i1) : i1 - "llvm.intr.memset"(%ptr, %byte, %0, %volatile) : (!llvm.ptr, i8, i32, i1) -> () - "llvm.intr.memmove"(%ptr, %ptr, %0, %volatile) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () - "llvm.intr.memcpy"(%ptr, %ptr, %0, %volatile) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + %true = llvm.mlir.constant(1 : i1) : i1 + "llvm.intr.memset"(%ptr, %byte, %0, %true) : (!llvm.ptr, i8, i32, i1) -> () + "llvm.intr.memmove"(%ptr, %ptr, %0, %true) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + "llvm.intr.memcpy"(%ptr, %ptr, %0, %true) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + "llvm.intr.assume"(%true) : (i1) -> () llvm.fence release %2 = llvm.atomicrmw add %ptr, %0 monotonic : !llvm.ptr, i32 %3 = llvm.cmpxchg %ptr, %0, %1 acq_rel monotonic : !llvm.ptr, i32 llvm.inline_asm has_side_effects "foo", "bar" : () -> () - llvm.cond_br %volatile, ^bb1, ^bb2 + llvm.cond_br %true, ^bb1, ^bb2 ^bb1: llvm.unreachable ^bb2: @@ -39,6 +40,7 @@ // CHECK: "llvm.intr.memset"(%[[PTR]] // CHECK: "llvm.intr.memmove"(%[[PTR]], %[[PTR]] // CHECK: "llvm.intr.memcpy"(%[[PTR]], %[[PTR]] +// CHECK: "llvm.intr.assume" // CHECK: llvm.fence release // CHECK: llvm.atomicrmw add %[[PTR]], %[[CST]] monotonic // CHECK: llvm.cmpxchg %[[PTR]], %[[CST]], %[[RES]] acq_rel monotonic @@ -564,7 +566,7 @@ // ----- -llvm.func @ignored_attrs(%ptr : !llvm.ptr { llvm.inreg, llvm.nocapture, llvm.nofree, llvm.preallocated = i32, llvm.returned, llvm.alignstack = 32 : i64, llvm.writeonly, llvm.noundef, llvm.nonnull }, %x : i32 { llvm.zeroext }) -> (!llvm.ptr { llvm.noundef, llvm.inreg, llvm.nonnull }) { +llvm.func @ignored_attrs(%ptr : !llvm.ptr { llvm.inreg, llvm.noalias, llvm.nocapture, llvm.nofree, llvm.preallocated = i32, llvm.returned, llvm.alignstack = 32 : i64, llvm.writeonly, llvm.noundef, llvm.nonnull }, %x : i32 { llvm.zeroext }) -> (!llvm.ptr { llvm.noundef, llvm.inreg, llvm.nonnull }) { llvm.return %ptr : !llvm.ptr } @@ -578,7 +580,7 @@ // ----- -llvm.func @disallowed_arg_attr(%ptr : !llvm.ptr { llvm.noalias }) { +llvm.func @disallowed_arg_attr(%ptr : !llvm.ptr { llvm.inalloca = i64 }) { llvm.return } @@ -588,16 +590,3 @@ llvm.call @disallowed_arg_attr(%ptr) : (!llvm.ptr) -> () llvm.return } - -// ----- - -llvm.func @disallowed_res_attr(%ptr : !llvm.ptr) -> (!llvm.ptr { llvm.noalias }) { - llvm.return %ptr : !llvm.ptr -} - -// CHECK-LABEL: @test_disallow_res_attr -// CHECK-NEXT: llvm.call -llvm.func @test_disallow_res_attr(%ptr : !llvm.ptr) { - llvm.call @disallowed_res_attr(%ptr) : (!llvm.ptr) -> (!llvm.ptr) - llvm.return -} diff --git a/mlir/test/Dialect/LLVMIR/sroa-statistics.mlir b/mlir/test/Dialect/LLVMIR/sroa-statistics.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/sroa-statistics.mlir @@ -0,0 +1,61 @@ +// RUN: mlir-opt %s --pass-pipeline="builtin.module(llvm.func(sroa))" --split-input-file --mlir-pass-statistics 2>&1 >/dev/null | FileCheck %s + +// CHECK: SROA +// CHECK-NEXT: (S) 1 destructured slots +// CHECK-NEXT: (S) 2 max subelement number +// CHECK-NEXT: (S) 1 slots with memory benefit +llvm.func @basic() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK: SROA +// CHECK-NEXT: (S) 1 destructured slots +// CHECK-NEXT: (S) 2 max subelement number +// CHECK-NEXT: (S) 0 slots with memory benefit +llvm.func @basic_no_memory_benefit() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> + %3 = llvm.getelementptr inbounds %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> + %4 = llvm.load %2 : !llvm.ptr -> i32 + %5 = llvm.load %3 : !llvm.ptr -> i32 + %6 = llvm.add %4, %5 : i32 + llvm.return %6 : i32 +} + +// ----- + +// CHECK: SROA +// CHECK-NEXT: (S) 1 destructured slots +// CHECK-NEXT: (S) 10 max subelement number +// CHECK-NEXT: (S) 1 slots with memory benefit +llvm.func @basic_array() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + +// SROA is applied repeatedly here, peeling off layers of aggregates one after +// the other, four times. + +// CHECK: SROA +// CHECK-NEXT: (S) 4 destructured slots +// CHECK-NEXT: (S) 10 max subelement number +// CHECK-NEXT: (S) 4 slots with memory benefit +llvm.func @multi_level_direct() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, struct<"bar", (i8, array<10 x array<10 x i32>>, i8)>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 2, 1, 5, 8] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, struct<"bar", (i8, array<10 x array<10 x i32>>, i8)>)> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} diff --git a/mlir/test/Dialect/LLVMIR/sroa.mlir b/mlir/test/Dialect/LLVMIR/sroa.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/sroa.mlir @@ -0,0 +1,211 @@ +// RUN: mlir-opt %s --pass-pipeline="builtin.module(llvm.func(sroa))" --split-input-file | FileCheck %s + +// CHECK-LABEL: llvm.func @basic_struct +llvm.func @basic_struct() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, i32)> + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @basic_array +llvm.func @basic_array() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x i32 + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @multi_level_direct +llvm.func @multi_level_direct() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, struct<"bar", (i8, array<10 x array<10 x i32>>, i8)>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 2, 1, 5, 8] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, struct<"bar", (i8, array<10 x array<10 x i32>>, i8)>)> + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// The first application of SROA would generate a GEP with indices [0, 0]. This +// test ensures this GEP is not eliminated during the first application. Even +// though doing it would be correct, it would prevent the second application +// of SROA to eliminate the array. GEPs should be eliminated only when they are +// truly trivial (with indices [0]). + +// CHECK-LABEL: llvm.func @multi_level_direct_two_applications +llvm.func @multi_level_direct_two_applications() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, array<10 x i32>, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 2, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, array<10 x i32>, i8)> + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @multi_level_indirect +llvm.func @multi_level_indirect() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, struct<"bar", (i8, array<10 x array<10 x i32>>, i8)>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr inbounds %1[0, 2, 1, 5] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, struct<"bar", (i8, array<10 x array<10 x i32>>, i8)>)> + %3 = llvm.getelementptr inbounds %2[0, 8] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %4 = llvm.load %3 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %4 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @resolve_alias +// CHECK-SAME: (%[[ARG:.*]]: i32) +llvm.func @resolve_alias(%arg: i32) -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, i32)> + %3 = llvm.getelementptr inbounds %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, i32)> + // CHECK: llvm.store %[[ARG]], %[[ALLOCA]] + llvm.store %arg, %2 : i32, !llvm.ptr + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %4 = llvm.load %3 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %4 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @no_non_single_support +llvm.func @no_non_single_support() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant + %0 = llvm.mlir.constant(2 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: = llvm.alloca + %2 = llvm.getelementptr inbounds %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, i32)> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @no_pointer_indexing +llvm.func @no_pointer_indexing() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: = llvm.alloca + %2 = llvm.getelementptr %1[1, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, i32)> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @no_direct_use +llvm.func @no_direct_use() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: = llvm.alloca + %2 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, i32)> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.call @use(%1) : (!llvm.ptr) -> () + llvm.return %3 : i32 +} + +llvm.func @use(!llvm.ptr) + +// ----- + +// CHECK-LABEL: llvm.func @direct_promotable_use_is_fine +llvm.func @direct_promotable_use_is_fine() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, i32)> + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %3 = llvm.load %2 : !llvm.ptr -> i32 + // This is a direct use of the slot but it can be removed because it implements PromotableOpInterface. + llvm.intr.lifetime.start 2, %1 : !llvm.ptr + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @direct_promotable_use_is_fine_on_accessor +llvm.func @direct_promotable_use_is_fine_on_accessor() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f64, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f64, i32)> + // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]] + %3 = llvm.load %2 : !llvm.ptr -> i32 + // This does not provide side-effect info but it can be removed because it implements PromotableOpInterface. + llvm.intr.lifetime.start 2, %2 : !llvm.ptr + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @no_dynamic_indexing +// CHECK-SAME: (%[[ARG:.*]]: i32) +llvm.func @no_dynamic_indexing(%arg: i32) -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: = llvm.alloca + // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, %[[ARG]]] + %2 = llvm.getelementptr %1[0, %arg] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32> + // CHECK: %[[RES:.*]] = llvm.load %[[GEP]] + %3 = llvm.load %2 : !llvm.ptr -> i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %3 : i32 +} + +// ----- + +// CHECK-LABEL: llvm.func @no_typed_pointers +llvm.func @no_typed_pointers() -> i32 { + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[SIZE]] x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr> + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr> + // CHECK-NOT: = llvm.alloca + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr>) -> !llvm.ptr + %3 = llvm.load %2 : !llvm.ptr + llvm.return %3 : i32 +} diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -114,6 +114,16 @@ // ----- +acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { +^bb0(%arg0: memref<10xf32>): + %0 = memref.alloc() : memref<10xf32> + acc.yield %0 : memref<10xf32> +} destroy { +^bb0(%arg0: memref<10xf32>): + memref.dealloc %arg0 : memref<10xf32> + acc.terminator +} + func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10xf32>, %d: memref<10xf32>) -> memref<10xf32> { %lb = arith.constant 0 : index %st = arith.constant 1 : index @@ -126,7 +136,7 @@ %pc = acc.present varPtr(%c : memref<10xf32>) -> memref<10xf32> %pd = acc.present varPtr(%d : memref<10xf32>) -> memref<10xf32> acc.data dataOperands(%pa, %pb, %pc, %pd: memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) { - acc.parallel num_gangs(%numGangs: i64) num_workers(%numWorkers: i64) private(%c : memref<10xf32>) { + acc.parallel num_gangs(%numGangs: i64) num_workers(%numWorkers: i64) private(@privatization_memref_10_f32 -> %c : memref<10xf32>) { acc.loop gang { scf.for %x = %lb to %c10 step %st { acc.loop worker { @@ -168,7 +178,7 @@ // CHECK-NEXT: [[NUMGANG:%.*]] = arith.constant 10 : i64 // CHECK-NEXT: [[NUMWORKERS:%.*]] = arith.constant 10 : i64 // CHECK: acc.data dataOperands(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) { -// CHECK-NEXT: acc.parallel num_gangs([[NUMGANG]] : i64) num_workers([[NUMWORKERS]] : i64) private([[ARG2]] : memref<10xf32>) { +// CHECK-NEXT: acc.parallel num_gangs([[NUMGANG]] : i64) num_workers([[NUMWORKERS]] : i64) private(@privatization_memref_10_f32 -> [[ARG2]] : memref<10xf32>) { // CHECK-NEXT: acc.loop gang { // CHECK-NEXT: scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] { // CHECK-NEXT: acc.loop worker { @@ -358,6 +368,26 @@ // ----- +acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { +^bb0(%arg0: memref<10xf32>): + %0 = memref.alloc() : memref<10xf32> + acc.yield %0 : memref<10xf32> +} destroy { +^bb0(%arg0: memref<10xf32>): + memref.dealloc %arg0 : memref<10xf32> + acc.terminator +} + +acc.private.recipe @privatization_memref_10_10_f32 : memref<10x10xf32> init { +^bb0(%arg0: memref<10x10xf32>): + %0 = memref.alloc() : memref<10x10xf32> + acc.yield %0 : memref<10x10xf32> +} destroy { +^bb0(%arg0: memref<10x10xf32>): + memref.dealloc %arg0 : memref<10x10xf32> + acc.terminator +} + func.func @testparallelop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { %i64value = arith.constant 1 : i64 %i32value = arith.constant 1 : i32 @@ -394,7 +424,7 @@ } acc.parallel vector_length(%idxValue: index) { } - acc.parallel private(%a, %c : memref<10xf32>, memref<10x10xf32>) firstprivate(%b: memref<10xf32>) { + acc.parallel private(@privatization_memref_10_f32 -> %a : memref<10xf32>, @privatization_memref_10_10_f32 -> %c : memref<10x10xf32>) firstprivate(%b: memref<10xf32>) { } acc.parallel { } attributes {defaultAttr = #acc} @@ -445,7 +475,7 @@ // CHECK-NEXT: } // CHECK: acc.parallel vector_length([[IDXVALUE]] : index) { // CHECK-NEXT: } -// CHECK: acc.parallel firstprivate([[ARGB]] : memref<10xf32>) private([[ARGA]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) { +// CHECK: acc.parallel firstprivate([[ARGB]] : memref<10xf32>) private(@privatization_memref_10_f32 -> [[ARGA]] : memref<10xf32>, @privatization_memref_10_10_f32 -> [[ARGC]] : memref<10x10xf32>) { // CHECK-NEXT: } // CHECK: acc.parallel { // CHECK-NEXT: } attributes {defaultAttr = #acc} @@ -460,7 +490,25 @@ // ----- -// ----- +acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { +^bb0(%arg0: memref<10xf32>): + %0 = memref.alloc() : memref<10xf32> + acc.yield %0 : memref<10xf32> +} destroy { +^bb0(%arg0: memref<10xf32>): + memref.dealloc %arg0 : memref<10xf32> + acc.terminator +} + +acc.private.recipe @privatization_memref_10_10_f32 : memref<10x10xf32> init { +^bb0(%arg0: memref<10x10xf32>): + %0 = memref.alloc() : memref<10x10xf32> + acc.yield %0 : memref<10x10xf32> +} destroy { +^bb0(%arg0: memref<10x10xf32>): + memref.dealloc %arg0 : memref<10x10xf32> + acc.terminator +} func.func @testserialop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { %i64value = arith.constant 1 : i64 @@ -480,7 +528,7 @@ } acc.serial wait(%i64value, %i32value, %idxValue : i64, i32, index) { } - acc.serial private(%a, %c : memref<10xf32>, memref<10x10xf32>) firstprivate(%b: memref<10xf32>) { + acc.serial private(@privatization_memref_10_f32 -> %a : memref<10xf32>, @privatization_memref_10_10_f32 -> %c : memref<10x10xf32>) firstprivate(%b: memref<10xf32>) { } acc.serial { } attributes {defaultAttr = #acc} @@ -516,7 +564,7 @@ // CHECK-NEXT: } // CHECK: acc.serial wait([[I64VALUE]], [[I32VALUE]], [[IDXVALUE]] : i64, i32, index) { // CHECK-NEXT: } -// CHECK: acc.serial firstprivate([[ARGB]] : memref<10xf32>) private([[ARGA]], [[ARGC]] : memref<10xf32>, memref<10x10xf32>) { +// CHECK: acc.serial firstprivate([[ARGB]] : memref<10xf32>) private(@privatization_memref_10_f32 -> [[ARGA]] : memref<10xf32>, @privatization_memref_10_10_f32 -> [[ARGC]] : memref<10x10xf32>) { // CHECK-NEXT: } // CHECK: acc.serial { // CHECK-NEXT: } attributes {defaultAttr = #acc} diff --git a/mlir/test/Dialect/OpenMP/attr.mlir b/mlir/test/Dialect/OpenMP/attr.mlir --- a/mlir/test/Dialect/OpenMP/attr.mlir +++ b/mlir/test/Dialect/OpenMP/attr.mlir @@ -56,3 +56,83 @@ // CHECK: module attributes {omp.version = #omp.version} { module attributes {omp.version = #omp.version} {} + +// ---- + +// CHECK-LABEL: func @omp_decl_tar_host_to +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp.declaretarget} { +func.func @omp_decl_tar_host_to() -> () attributes {omp.declare_target = #omp.declaretarget} { + return +} + +// CHECK-LABEL: func @omp_decl_tar_host_link +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp.declaretarget} { +func.func @omp_decl_tar_host_link() -> () attributes {omp.declare_target = #omp.declaretarget} { + return +} + +// CHECK-LABEL: func @omp_decl_tar_nohost_to +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp.declaretarget} { +func.func @omp_decl_tar_nohost_to() -> () attributes {omp.declare_target = #omp.declaretarget} { + return +} + +// CHECK-LABEL: func @omp_decl_tar_nohost_link +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp.declaretarget} { +func.func @omp_decl_tar_nohost_link() -> () attributes {omp.declare_target = #omp.declaretarget} { + return +} + +// CHECK-LABEL: func @omp_decl_tar_any_to +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp.declaretarget} { +func.func @omp_decl_tar_any_to() -> () attributes {omp.declare_target = #omp.declaretarget} { + return +} + +// CHECK-LABEL: func @omp_decl_tar_any_link +// CHECK-SAME: {{.*}} attributes {omp.declare_target = #omp.declaretarget} { +func.func @omp_decl_tar_any_link() -> () attributes {omp.declare_target = #omp.declaretarget} { + return +} + +// CHECK-LABEL: global external @omp_decl_tar_data_host_to +// CHECK-SAME: {{.*}} {{{.*}}omp.declare_target = #omp.declaretarget} +llvm.mlir.global external @omp_decl_tar_data_host_to() {omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 +} + +// CHECK-LABEL: global external @omp_decl_tar_data_host_link +// CHECK-SAME: {{.*}} {{{.*}}omp.declare_target = #omp.declaretarget} +llvm.mlir.global external @omp_decl_tar_data_host_link() {omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 +} + +// CHECK-LABEL: global external @omp_decl_tar_data_nohost_to +// CHECK-SAME: {{.*}} {{{.*}}omp.declare_target = #omp.declaretarget} +llvm.mlir.global external @omp_decl_tar_data_nohost_to() {omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 +} + +// CHECK-LABEL: global external @omp_decl_tar_data_nohost_link +// CHECK-SAME: {{.*}} {{{.*}}omp.declare_target = #omp.declaretarget} +llvm.mlir.global external @omp_decl_tar_data_nohost_link() {omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 +} + +// CHECK-LABEL: global external @omp_decl_tar_data_any_to +// CHECK-SAME: {{.*}} {{{.*}}omp.declare_target = #omp.declaretarget} +llvm.mlir.global external @omp_decl_tar_data_any_to() {omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 +} + +// CHECK-LABEL: global external @omp_decl_tar_data_any_link +// CHECK-SAME: {{.*}} {{{.*}}omp.declare_target = #omp.declaretarget} +llvm.mlir.global external @omp_decl_tar_data_any_link() {omp.declare_target = #omp.declaretarget} : i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + llvm.return %0 : i32 +} diff --git a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir --- a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir +++ b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir @@ -23,9 +23,9 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.cast %0 : !pdl.operation to !transform.op<"scf.for"> +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> %2 = transform.loop.coalesce %1: (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) } @@ -49,9 +49,9 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["affine.for"]} attributes {coalesce} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.cast %0 : !pdl.operation to !transform.op<"affine.for"> +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["affine.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"affine.for"> %2 = transform.loop.coalesce %1 : (!transform.op<"affine.for">) -> (!transform.op<"affine.for">) } @@ -84,9 +84,9 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.cast %0 : !pdl.operation to !transform.op<"scf.for"> +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) transform.loop.unroll %2 {factor = 3} : !transform.op<"scf.for"> } diff --git a/mlir/test/Dialect/SCF/transform-ops-invalid.mlir b/mlir/test/Dialect/SCF/transform-ops-invalid.mlir --- a/mlir/test/Dialect/SCF/transform-ops-invalid.mlir +++ b/mlir/test/Dialect/SCF/transform-ops-invalid.mlir @@ -11,9 +11,9 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["affine.for"]} attributes {coalesce} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.cast %0 : !pdl.operation to !transform.op<"affine.for"> +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["affine.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"affine.for"> // expected-error @below {{failed to coalesce}} %2 = transform.loop.coalesce %1: (!transform.op<"affine.for">) -> (!transform.op<"affine.for">) } @@ -28,9 +28,9 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.loop.get_parent_for %0 { affine = true } : (!pdl.operation) -> !transform.op<"affine.for"> +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.loop.get_parent_for %0 { affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> // expected-error @below {{failed to unroll}} transform.loop.unroll %1 { factor = 8 } : !transform.op<"affine.for"> } diff --git a/mlir/test/Dialect/SCF/transform-ops.mlir b/mlir/test/Dialect/SCF/transform-ops.mlir --- a/mlir/test/Dialect/SCF/transform-ops.mlir +++ b/mlir/test/Dialect/SCF/transform-ops.mlir @@ -16,12 +16,12 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op // CHECK: = transform.loop.get_parent_for - %1 = transform.loop.get_parent_for %0 : (!pdl.operation) -> !transform.op<"scf.for"> - %2 = transform.loop.get_parent_for %0 { num_loops = 2 } : (!pdl.operation) -> !transform.op<"scf.for"> - %3 = transform.loop.get_parent_for %0 { num_loops = 3 } : (!pdl.operation) -> !transform.op<"scf.for"> + %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> + %2 = transform.loop.get_parent_for %0 { num_loops = 2 } : (!transform.any_op) -> !transform.op<"scf.for"> + %3 = transform.loop.get_parent_for %0 { num_loops = 3 } : (!transform.any_op) -> !transform.op<"scf.for"> transform.test_print_remark_at_operand %1, "third loop" : !transform.op<"scf.for"> transform.test_print_remark_at_operand %2, "second loop" : !transform.op<"scf.for"> transform.test_print_remark_at_operand %3, "first loop" : !transform.op<"scf.for"> @@ -36,10 +36,10 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{could not find an 'scf.for' parent}} - %1 = transform.loop.get_parent_for %0 : (!pdl.operation) -> !transform.op<"scf.for"> + %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> } // ----- @@ -104,10 +104,10 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.loop.get_parent_for %0 : (!pdl.operation) -> !transform.op<"scf.for"> - transform.loop.peel %1 : (!transform.op<"scf.for">) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> + transform.loop.peel %1 : (!transform.op<"scf.for">) -> !transform.any_op } // ----- @@ -137,12 +137,12 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addf"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.loop.get_parent_for %0 : (!pdl.operation) -> !transform.op<"scf.for"> - %2 = transform.loop.pipeline %1 : (!transform.op<"scf.for">) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addf"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> + %2 = transform.loop.pipeline %1 : (!transform.op<"scf.for">) -> !transform.any_op // Verify that the returned handle is usable. - transform.test_print_remark_at_operand %2, "transformed" : !pdl.operation + transform.test_print_remark_at_operand %2, "transformed" : !transform.any_op } // ----- @@ -161,9 +161,9 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.loop.get_parent_for %0 : (!pdl.operation) -> !transform.op<"scf.for"> +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.op<"scf.for"> transform.loop.unroll %1 { factor = 4 } : !transform.op<"scf.for"> } @@ -185,12 +185,12 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op // CHECK: = transform.loop.get_parent_for - %1 = transform.loop.get_parent_for %0 { affine = true } : (!pdl.operation) -> !transform.op<"affine.for"> - %2 = transform.loop.get_parent_for %0 { num_loops = 2, affine = true } : (!pdl.operation) -> !transform.op<"affine.for"> - %3 = transform.loop.get_parent_for %0 { num_loops = 3, affine = true } : (!pdl.operation) -> !transform.op<"affine.for"> + %1 = transform.loop.get_parent_for %0 { affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> + %2 = transform.loop.get_parent_for %0 { num_loops = 2, affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> + %3 = transform.loop.get_parent_for %0 { num_loops = 3, affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> transform.test_print_remark_at_operand %1, "third loop" : !transform.op<"affine.for"> transform.test_print_remark_at_operand %2, "second loop" : !transform.op<"affine.for"> transform.test_print_remark_at_operand %3, "first loop" : !transform.op<"affine.for"> @@ -205,10 +205,10 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{could not find an 'affine.for' parent}} - %1 = transform.loop.get_parent_for %0 { affine = true } : (!pdl.operation) -> !transform.op<"affine.for"> + %1 = transform.loop.get_parent_for %0 { affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> } // ----- @@ -227,9 +227,9 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.loop.get_parent_for %0 { affine = true } : (!pdl.operation) -> !transform.op<"affine.for"> +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.loop.get_parent_for %0 { affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> transform.test_print_remark_at_operand %1, "affine for loop" : !transform.op<"affine.for"> transform.loop.unroll %1 { factor = 4, affine = true } : !transform.op<"affine.for"> } @@ -252,9 +252,9 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.loop.get_parent_for %0 { num_loops = 1, affine = true } : (!pdl.operation) -> !transform.op<"affine.for"> +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.loop.get_parent_for %0 { num_loops = 1, affine = true } : (!transform.any_op) -> !transform.op<"affine.for"> transform.test_print_remark_at_operand %1, "affine for loop" : !transform.op<"affine.for"> transform.loop.unroll %1 { factor = 4 } : !transform.op<"affine.for"> } diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -582,3 +582,20 @@ // CHECK: return %[[r]] : tensor return %0 : tensor } + +// ----- + +// CHECK-LABEL: func @tensor.splat( +// CHECK-SAME: %[[F:.*]]: f32) +// CHECK-DAG: %[[ALLOC:.*]] = memref.alloc() {{.*}} : memref<10x2x4xf32> +// CHECK: %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]] +// CHECK: %[[MAPPED:.*]] = linalg.map +// CHECK: outs(%[[ALLOC_T]] : tensor<10x2x4xf32>) +// CHECK: linalg.yield %[[F]] +// CHECK: } +// CHECK: return %[[MAPPED]] : tensor<10x2x4xf32> +// CHECK: } +func.func @tensor.splat(%f: f32) -> tensor<10x2x4xf32> { + %t = tensor.splat %f : tensor<10x2x4xf32> + return %t : tensor<10x2x4xf32> +} diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -549,3 +549,13 @@ return %1 : tensor } +// ----- + +// CHECK-LABEL: @fold_abs_abs +func.func @fold_abs_abs(%arg0: tensor) -> tensor { + // CHECK: %[[ABS:.*]] = "tosa.abs"(%arg{{.*}}) : (tensor) -> tensor + // CHECK: return %[[ABS]] : tensor + %0 = "tosa.abs"(%arg0) : (tensor) -> tensor + %1 = "tosa.abs"(%0) : (tensor) -> tensor + return %1 : tensor +} diff --git a/mlir/test/Dialect/Transform/check-use-after-free.mlir b/mlir/test/Dialect/Transform/check-use-after-free.mlir --- a/mlir/test/Dialect/Transform/check-use-after-free.mlir +++ b/mlir/test/Dialect/Transform/check-use-after-free.mlir @@ -2,7 +2,7 @@ func.func @use_after_free_branching_control_flow() { // expected-note @below {{allocated here}} - %0 = transform.test_produce_self_handle_or_forward_operand + %0 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op transform.test_transform_op_with_regions { "transform.test_branching_transform_op_terminator"() : () -> () }, @@ -11,14 +11,14 @@ "transform.test_branching_transform_op_terminator"()[^bb1, ^bb2] : () -> () ^bb1: // expected-note @below {{freed here}} - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op "transform.test_branching_transform_op_terminator"()[^bb3] : () -> () ^bb2: "transform.test_branching_transform_op_terminator"()[^bb3] : () -> () ^bb3: // expected-warning @below {{operand #0 may be used after free}} - transform.sequence %0 : !pdl.operation failures(propagate) { - ^bb0(%arg0: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) { + ^bb0(%arg0: !transform.any_op): } "transform.test_branching_transform_op_terminator"() : () -> () } @@ -29,7 +29,7 @@ func.func @use_after_free_in_nested_op() { // expected-note @below {{allocated here}} - %0 = transform.test_produce_self_handle_or_forward_operand + %0 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op // expected-note @below {{freed here}} transform.test_transform_op_with_regions { "transform.test_branching_transform_op_terminator"() : () -> () @@ -38,7 +38,7 @@ ^bb0: "transform.test_branching_transform_op_terminator"()[^bb1, ^bb2] : () -> () ^bb1: - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op "transform.test_branching_transform_op_terminator"()[^bb3] : () -> () ^bb2: "transform.test_branching_transform_op_terminator"()[^bb3] : () -> () @@ -46,8 +46,8 @@ "transform.test_branching_transform_op_terminator"() : () -> () } // expected-warning @below {{operand #0 may be used after free}} - transform.sequence %0 : !pdl.operation failures(propagate) { - ^bb0(%arg0: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) { + ^bb0(%arg0: !transform.any_op): } return } @@ -56,29 +56,29 @@ func.func @use_after_free_recursive_side_effects() { transform.sequence failures(propagate) { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): // expected-note @below {{allocated here}} - %0 = transform.sequence %arg0 : !pdl.operation -> !pdl.operation failures(propagate) attributes { ord = 1 } { - ^bb1(%arg1: !pdl.operation): - yield %arg1 : !pdl.operation + %0 = transform.sequence %arg0 : !transform.any_op -> !transform.any_op failures(propagate) attributes { ord = 1 } { + ^bb1(%arg1: !transform.any_op): + yield %arg1 : !transform.any_op } - transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 2 } { - ^bb2(%arg2: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) attributes { ord = 2 } { + ^bb2(%arg2: !transform.any_op): } - transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 3 } { - ^bb3(%arg3: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) attributes { ord = 3 } { + ^bb3(%arg3: !transform.any_op): } // `transform.sequence` has recursive side effects so it has the same "free" // as the child op it contains. // expected-note @below {{freed here}} - transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 4 } { - ^bb4(%arg4: !pdl.operation): - test_consume_operand_of_op_kind_or_fail %0, "transform.sequence" + transform.sequence %0 : !transform.any_op failures(propagate) attributes { ord = 4 } { + ^bb4(%arg4: !transform.any_op): + test_consume_operand_of_op_kind_or_fail %0, "transform.sequence" : !transform.any_op } // expected-warning @below {{operand #0 may be used after free}} - transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 5 } { - ^bb3(%arg3: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) attributes { ord = 5 } { + ^bb3(%arg3: !transform.any_op): } } return @@ -88,24 +88,24 @@ func.func @use_after_free() { transform.sequence failures(propagate) { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): // expected-note @below {{allocated here}} - %0 = transform.sequence %arg0 : !pdl.operation -> !pdl.operation failures(propagate) attributes { ord = 1 } { - ^bb1(%arg1: !pdl.operation): - yield %arg1 : !pdl.operation + %0 = transform.sequence %arg0 : !transform.any_op -> !transform.any_op failures(propagate) attributes { ord = 1 } { + ^bb1(%arg1: !transform.any_op): + yield %arg1 : !transform.any_op } - transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 2 } { - ^bb2(%arg2: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) attributes { ord = 2 } { + ^bb2(%arg2: !transform.any_op): } - transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 3 } { - ^bb3(%arg3: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) attributes { ord = 3 } { + ^bb3(%arg3: !transform.any_op): } // expected-note @below {{freed here}} - test_consume_operand_of_op_kind_or_fail %0, "transform.sequence" + test_consume_operand_of_op_kind_or_fail %0, "transform.sequence" : !transform.any_op // expected-warning @below {{operand #0 may be used after free}} - transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 5 } { - ^bb3(%arg3: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) attributes { ord = 5 } { + ^bb3(%arg3: !transform.any_op): } } return @@ -118,7 +118,7 @@ // be reported as use-after-free. func.func @use_after_free_self_cycle() { // expected-note @below {{allocated here}} - %0 = transform.test_produce_self_handle_or_forward_operand + %0 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op transform.test_transform_op_with_regions { "transform.test_branching_transform_op_terminator"() : () -> () }, @@ -127,12 +127,12 @@ "transform.test_branching_transform_op_terminator"()[^bb1] : () -> () ^bb1: // expected-warning @below {{operand #0 may be used after free}} - transform.sequence %0 : !pdl.operation failures(propagate) { - ^bb0(%arg0: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) { + ^bb0(%arg0: !transform.any_op): } // expected-warning @below {{operand #0 may be used after free}} // expected-note @below {{freed here}} - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op "transform.test_branching_transform_op_terminator"()[^bb1, ^bb2] : () -> () ^bb2: "transform.test_branching_transform_op_terminator"() : () -> () @@ -147,7 +147,7 @@ // use-after-free. func.func @use_after_free_cycle() { // expected-note @below {{allocated here}} - %0 = transform.test_produce_self_handle_or_forward_operand + %0 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op transform.test_transform_op_with_regions { "transform.test_branching_transform_op_terminator"() : () -> () }, @@ -157,7 +157,7 @@ ^bb1: // expected-warning @below {{operand #0 may be used after free}} // expected-note @below {{freed here}} - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op "transform.test_branching_transform_op_terminator"()[^bb2, ^bb3] : () -> () ^bb2: "transform.test_branching_transform_op_terminator"()[^bb1] : () -> () @@ -172,8 +172,8 @@ // This should not crash. transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - alternatives %arg0 : !pdl.operation { - ^bb0(%arg1: !pdl.operation): +^bb0(%arg0: !transform.any_op): + alternatives %arg0 : !transform.any_op { + ^bb0(%arg1: !transform.any_op): } } diff --git a/mlir/test/Dialect/Transform/expensive-checks.mlir b/mlir/test/Dialect/Transform/expensive-checks.mlir --- a/mlir/test/Dialect/Transform/expensive-checks.mlir +++ b/mlir/test/Dialect/Transform/expensive-checks.mlir @@ -7,7 +7,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @return : benefit(1) { %0 = operands %1 = types @@ -15,15 +15,15 @@ rewrite %2 with "transform.dialect" } - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): // expected-note @below {{handle to invalidated ops}} - %0 = pdl_match @return in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation + %0 = pdl_match @return in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = get_closest_isolated_parent %0 : (!transform.any_op) -> !transform.any_op // expected-note @below {{invalidated by this transform op that consumes its operand #0}} - test_consume_operand %1 : !pdl.operation + test_consume_operand %1 : !transform.any_op // expected-error @below {{op uses a handle invalidated by a previously executed transform op}} - test_print_remark_at_operand %0, "remark" : !pdl.operation + test_print_remark_at_operand %0, "remark" : !transform.any_op } } @@ -36,7 +36,7 @@ func.func private @func2() transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @func : benefit(1) { %0 = operands %1 = types @@ -50,14 +50,14 @@ rewrite %2 with "transform.dialect" } - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = pdl_match @func in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = pdl_match @return in %arg1 : (!pdl.operation) -> !pdl.operation - %2 = replicate num(%0) %1 : !pdl.operation, !pdl.operation + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = pdl_match @func in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = pdl_match @return in %arg1 : (!transform.any_op) -> !transform.any_op + %2 = replicate num(%0) %1 : !transform.any_op, !transform.any_op // expected-error @below {{a handle passed as operand #0 and consumed by this operation points to a payload entity more than once}} - test_consume_operand %2 : !pdl.operation - test_print_remark_at_operand %0, "remark" : !pdl.operation + test_consume_operand %2 : !transform.any_op + test_print_remark_at_operand %0, "remark" : !transform.any_op } } @@ -69,14 +69,14 @@ module { transform.sequence failures(propagate) { - ^bb0(%0: !pdl.operation): - %1 = transform.test_copy_payload %0 + ^bb0(%0: !transform.any_op): + %1 = transform.test_copy_payload %0 : (!transform.any_op) -> !transform.any_op // expected-note @below {{handle to invalidated ops}} - %2 = transform.test_copy_payload %0 + %2 = transform.test_copy_payload %0 : (!transform.any_op) ->!transform.any_op // expected-note @below {{invalidated by this transform op that consumes its operand #0}} - transform.test_consume_operand %1 : !pdl.operation + transform.test_consume_operand %1 : !transform.any_op // expected-error @below {{op uses a handle invalidated by a previously executed transform op}} - transform.test_consume_operand %2 : !pdl.operation + transform.test_consume_operand %2 : !transform.any_op } } @@ -87,16 +87,16 @@ module { transform.sequence failures(propagate) { - ^bb0(%0: !pdl.operation): - %1 = transform.test_copy_payload %0 + ^bb0(%0: !transform.any_op): + %1 = transform.test_copy_payload %0 : (!transform.any_op) -> !transform.any_op // expected-note @below {{handle to invalidated ops}} - %2 = transform.test_copy_payload %0 + %2 = transform.test_copy_payload %0 : (!transform.any_op) -> !transform.any_op // Consuming two handles in the same operation is invalid if they point // to overlapping sets of payload IR ops. // // expected-error @below {{op uses a handle invalidated by a previously executed transform op}} // expected-note @below {{invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities}} - transform.test_consume_operand %1, %2 : !pdl.operation + transform.test_consume_operand %1, %2 : !transform.any_op, !transform.any_op } } @@ -107,10 +107,10 @@ module { transform.sequence failures(propagate) { - ^bb0(%0: !pdl.operation): - %1 = transform.test_copy_payload %0 - %2 = transform.test_copy_payload %0 - transform.merge_handles %1, %2 { deduplicate } : !pdl.operation + ^bb0(%0: !transform.any_op): + %1 = transform.test_copy_payload %0 : (!transform.any_op) -> !transform.any_op + %2 = transform.test_copy_payload %0 : (!transform.any_op) -> !transform.any_op + transform.merge_handles %1, %2 { deduplicate } : !transform.any_op } } // ----- diff --git a/mlir/test/Dialect/Transform/multi-arg-top-level-params.mlir b/mlir/test/Dialect/Transform/multi-arg-top-level-params.mlir --- a/mlir/test/Dialect/Transform/multi-arg-top-level-params.mlir +++ b/mlir/test/Dialect/Transform/multi-arg-top-level-params.mlir @@ -2,7 +2,7 @@ // RUN: --split-input-file --verify-diagnostics transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation, %arg1: !transform.param, %arg2: !transform.param): +^bb0(%arg0: !transform.any_op, %arg1: !transform.param, %arg2: !transform.param): // expected-remark @below {{1 : i64, 2 : i64, 3 : i64}} transform.test_print_param %arg1 : !transform.param // expected-remark @below {{42 : i64, 45 : i64}} @@ -12,7 +12,7 @@ // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation, %arg1: !transform.any_op, %arg2: !transform.param): +^bb0(%arg0: !transform.any_op, %arg1: !transform.any_op, %arg2: !transform.param): // expected-error @above {{wrong kind of value provided for top-level operation handle}} } @@ -20,5 +20,5 @@ // expected-error @below {{operation expects 3 extra value bindings, but 2 were provided to the interpreter}} transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation, %arg1: !transform.param, %arg2: !transform.param, %arg3: !transform.param): +^bb0(%arg0: !transform.any_op, %arg1: !transform.param, %arg2: !transform.param, %arg3: !transform.param): } diff --git a/mlir/test/Dialect/Transform/ops-invalid.mlir b/mlir/test/Dialect/Transform/ops-invalid.mlir --- a/mlir/test/Dialect/Transform/ops-invalid.mlir +++ b/mlir/test/Dialect/Transform/ops-invalid.mlir @@ -15,10 +15,10 @@ // expected-note @below {{nested in another possible top-level op}} transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{expects operands to be provided for a nested op}} transform.sequence failures(propagate) { - ^bb1(%arg1: !pdl.operation): + ^bb1(%arg1: !transform.any_op): } } @@ -34,7 +34,7 @@ // expected-error @below {{expected children ops to implement TransformOpInterface}} transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-note @below {{op without interface}} arith.constant 42.0 : f32 } @@ -42,8 +42,8 @@ // ----- // expected-error @below {{expects the types of the terminator operands to match the types of the result}} -%0 = transform.sequence -> !pdl.operation failures(propagate) { -^bb0(%arg0: !pdl.operation): +%0 = transform.sequence -> !transform.any_op failures(propagate) { +^bb0(%arg0: !transform.any_op): // expected-note @below {{terminator}} transform.yield } @@ -54,7 +54,7 @@ ^bb0(%arg0: !transform.any_op): // expected-error @below {{expects the type of the block argument to match the type of the operand}} transform.sequence %arg0: !transform.any_op failures(propagate) { - ^bb1(%arg1: !pdl.operation): + ^bb1(%arg1: !transform.op<"builtin.module">): transform.yield } } @@ -82,10 +82,10 @@ // expected-note @below {{nested in another possible top-level op}} transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{expects operands to be provided for a nested op}} transform.sequence failures(propagate) { - ^bb1(%arg1: !pdl.operation): + ^bb1(%arg1: !transform.any_op): } } @@ -93,14 +93,14 @@ // expected-error @below {{expects only one non-pattern op in its body}} transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-note @below {{first non-pattern op}} transform.sequence failures(propagate) { - ^bb1(%arg1: !pdl.operation): + ^bb1(%arg1: !transform.any_op): } // expected-note @below {{second non-pattern op}} transform.sequence failures(propagate) { - ^bb1(%arg1: !pdl.operation): + ^bb1(%arg1: !transform.any_op): } } @@ -108,7 +108,7 @@ // expected-error @below {{expects only pattern and top-level transform ops in its body}} transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-note @below {{offending op}} "test.something"() : () -> () } @@ -117,10 +117,10 @@ // expected-note @below {{parent operation}} transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{op cannot be nested}} - transform.with_pdl_patterns %arg0 : !pdl.operation { - ^bb1(%arg1: !pdl.operation): + transform.with_pdl_patterns %arg0 : !transform.any_op { + ^bb1(%arg1: !transform.any_op): } } @@ -128,7 +128,7 @@ // expected-error @below {{op expects at least one non-pattern op}} transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operation "test.foo" pdl.rewrite %0 with "transform.dialect" @@ -138,10 +138,10 @@ // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{op expects at least one non-pattern op}} - with_pdl_patterns %arg0 : !pdl.operation { - ^bb1(%arg1: !pdl.operation): + with_pdl_patterns %arg0 : !transform.any_op { + ^bb1(%arg1: !transform.any_op): } } @@ -155,7 +155,7 @@ // expected-error @below {{expects a single-block region}} "transform.test_transform_unrestricted_op_no_interface"() ({ -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): "test.potential_terminator"() : () -> () ^bb1: "test.potential_terminator"() : () -> () @@ -164,59 +164,59 @@ // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{result #0 has more than one potential consumer}} - %0 = test_produce_self_handle_or_forward_operand + %0 = test_produce_self_handle_or_forward_operand : () -> !transform.any_op // expected-note @below {{used here as operand #0}} - test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op // expected-note @below {{used here as operand #0}} - test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{result #0 has more than one potential consumer}} - %0 = test_produce_self_handle_or_forward_operand + %0 = test_produce_self_handle_or_forward_operand : () -> !transform.any_op // expected-note @below {{used here as operand #0}} - test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op // expected-note @below {{used here as operand #0}} - transform.sequence %0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - test_consume_operand_of_op_kind_or_fail %arg1, "transform.test_produce_self_handle_or_forward_operand" + transform.sequence %0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + test_consume_operand_of_op_kind_or_fail %arg1, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } } // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{result #0 has more than one potential consumer}} - %0 = test_produce_self_handle_or_forward_operand + %0 = test_produce_self_handle_or_forward_operand : () -> !transform.any_op // expected-note @below {{used here as operand #0}} - test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" - transform.sequence %0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): + test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op + transform.sequence %0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): // expected-note @below {{used here as operand #0}} - test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } } // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{result #0 has more than one potential consumer}} - %0 = test_produce_self_handle_or_forward_operand + %0 = test_produce_self_handle_or_forward_operand : () -> !transform.any_op // expected-note @below {{used here as operand #0}} - test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op // expected-note @below {{used here as operand #0}} - transform.sequence %0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - transform.sequence %arg1 : !pdl.operation failures(propagate) { - ^bb2(%arg2: !pdl.operation): - test_consume_operand_of_op_kind_or_fail %arg2, "transform.test_produce_self_handle_or_forward_operand" + transform.sequence %0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + transform.sequence %arg1 : !transform.any_op failures(propagate) { + ^bb2(%arg2: !transform.any_op): + test_consume_operand_of_op_kind_or_fail %arg2, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } } } @@ -224,7 +224,7 @@ // ----- transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): +^bb1(%arg1: !transform.any_op): // expected-error @below {{expects at least one region}} transform.alternatives } @@ -232,13 +232,13 @@ // ----- transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): +^bb1(%arg1: !transform.any_op): // expected-error @below {{expects terminator operands to have the same type as results of the operation}} - %2 = transform.alternatives %arg1 : !pdl.operation -> !pdl.operation { - ^bb2(%arg2: !pdl.operation): - transform.yield %arg2 : !pdl.operation + %2 = transform.alternatives %arg1 : !transform.any_op -> !transform.any_op { + ^bb2(%arg2: !transform.any_op): + transform.yield %arg2 : !transform.any_op }, { - ^bb2(%arg2: !pdl.operation): + ^bb2(%arg2: !transform.any_op): // expected-note @below {{terminator}} transform.yield } @@ -255,16 +255,16 @@ // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): // expected-error @below {{result #0 has more than one potential consumer}} - %0 = test_produce_self_handle_or_forward_operand + %0 = test_produce_self_handle_or_forward_operand : () -> !transform.any_op // expected-note @below {{used here as operand #0}} - transform.foreach %0 : !pdl.operation { - ^bb1(%arg1: !pdl.operation): - transform.test_consume_operand %arg1 : !pdl.operation + transform.foreach %0 : !transform.any_op { + ^bb1(%arg1: !transform.any_op): + transform.test_consume_operand %arg1 : !transform.any_op } // expected-note @below {{used here as operand #0}} - transform.test_consume_operand %0 : !pdl.operation + transform.test_consume_operand %0 : !transform.any_op } // ----- diff --git a/mlir/test/Dialect/Transform/ops.mlir b/mlir/test/Dialect/Transform/ops.mlir --- a/mlir/test/Dialect/Transform/ops.mlir +++ b/mlir/test/Dialect/Transform/ops.mlir @@ -1,42 +1,42 @@ // RUN: mlir-opt %s | mlir-opt | FileCheck %s // CHECK: transform.sequence -// CHECK: ^{{.+}}(%{{.+}}: !pdl.operation): +// CHECK: ^{{.+}}(%{{.+}}: !transform.any_op): transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - // CHECK: sequence %{{.+}} : !pdl.operation - // CHECK: ^{{.+}}(%{{.+}}: !pdl.operation): - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): +^bb0(%arg0: !transform.any_op): + // CHECK: sequence %{{.+}} : !transform.any_op + // CHECK: ^{{.+}}(%{{.+}}: !transform.any_op): + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): } } // CHECK: transform.with_pdl_patterns -// CHECK: ^{{.+}}(%[[ARG:.+]]: !pdl.operation): +// CHECK: ^{{.+}}(%[[ARG:.+]]: !transform.any_op): transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): - // CHECK: sequence %[[ARG]] : !pdl.operation - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): +^bb0(%arg0: !transform.any_op): + // CHECK: sequence %[[ARG]] : !transform.any_op + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): } } // Using the same value multiple times without consuming it is fine. // CHECK: transform.sequence -// CHECK: %[[V:.+]] = sequence %{{.*}} : !pdl.operation -> !pdl.operation +// CHECK: %[[V:.+]] = sequence %{{.*}} : !transform.any_op -> !transform.any_op // CHECK: sequence %[[V]] // CHECK: sequence %[[V]] transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - %0 = transform.sequence %arg0 : !pdl.operation -> !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - yield %arg1 : !pdl.operation +^bb0(%arg0: !transform.any_op): + %0 = transform.sequence %arg0 : !transform.any_op -> !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + yield %arg1 : !transform.any_op } - transform.sequence %0 : !pdl.operation failures(propagate) { - ^bb2(%arg2: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) { + ^bb2(%arg2: !transform.any_op): } - transform.sequence %0 : !pdl.operation failures(propagate) { - ^bb3(%arg3: !pdl.operation): + transform.sequence %0 : !transform.any_op failures(propagate) { + ^bb3(%arg3: !transform.any_op): } } @@ -70,17 +70,17 @@ // CHECK: transform.sequence // CHECK: foreach transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - transform.foreach %arg0 : !pdl.operation { - ^bb1(%arg1: !pdl.operation): +^bb0(%arg0: !transform.any_op): + transform.foreach %arg0 : !transform.any_op { + ^bb1(%arg1: !transform.any_op): } } // CHECK: transform.sequence transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - // CHECK: cast %{{.*}} : !pdl.operation to !transform.any_op - %0 = cast %arg0: !pdl.operation to !transform.any_op +^bb0(%arg0: !transform.any_op): + // CHECK: cast %{{.*}} : !transform.any_op to !transform.any_op + %0 = cast %arg0: !transform.any_op to !transform.any_op // CHECK: cast %{{.*}} : !transform.any_op to !transform.op<"builtin.module"> %1 = cast %0: !transform.any_op to !transform.op<"builtin.module"> } @@ -91,9 +91,9 @@ // CHECK: print // CHECK: print transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - transform.print %arg0 : !pdl.operation +^bb0(%arg0: !transform.any_op): + transform.print %arg0 : !transform.any_op transform.print - transform.print %arg0 {name = "test"} : !pdl.operation + transform.print %arg0 {name = "test"} : !transform.any_op transform.print {name = "test"} } diff --git a/mlir/test/Dialect/Transform/test-dialect-injection.mlir b/mlir/test/Dialect/Transform/test-dialect-injection.mlir --- a/mlir/test/Dialect/Transform/test-dialect-injection.mlir +++ b/mlir/test/Dialect/Transform/test-dialect-injection.mlir @@ -7,11 +7,11 @@ transform.test_transform_op // CHECK: = transform.test_produce_self_handle_or_forward_operand {foo = "bar"} -%0 = transform.test_produce_self_handle_or_forward_operand { foo = "bar" } +%0 = transform.test_produce_self_handle_or_forward_operand { foo = "bar" } : () -> !transform.any_op // CHECK: transform.test_consume_operand_of_op_kind_or_fail %{{.*}}, -transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" +transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op // Ensure that the extension type is roundtripped correctly. -// CHECK: transform.cast %{{.*}} : !pdl.operation to !transform.test_dialect_op -%1 = transform.cast %0: !pdl.operation to !transform.test_dialect_op +// CHECK: transform.cast %{{.*}} : !transform.any_op to !transform.test_dialect_op +%1 = transform.cast %0: !transform.any_op to !transform.test_dialect_op diff --git a/mlir/test/Dialect/Transform/test-interpreter.mlir b/mlir/test/Dialect/Transform/test-interpreter.mlir --- a/mlir/test/Dialect/Transform/test-interpreter.mlir +++ b/mlir/test/Dialect/Transform/test-interpreter.mlir @@ -10,18 +10,18 @@ transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): - %0 = transform.test_produce_self_handle_or_forward_operand { foo = "bar" } + %0 = transform.test_produce_self_handle_or_forward_operand { foo = "bar" } : () -> !transform.any_op // expected-remark @below {{succeeded}} - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } // ----- transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): - %0 = transform.test_produce_self_handle_or_forward_operand { foo = "bar" } + %0 = transform.test_produce_self_handle_or_forward_operand { foo = "bar" } : () -> !transform.any_op // expected-error @below {{expected the operand to be associated a payload op of kind transform.sequence got transform.test_produce_self_handle_or_forward_operand}} - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.sequence" + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.sequence" : !transform.any_op } // ----- @@ -31,18 +31,18 @@ // to detect double-consumption. transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): - %0 = transform.test_produce_self_handle_or_forward_operand { foo = "bar" } - %1 = transform.test_copy_payload %0 + %0 = transform.test_produce_self_handle_or_forward_operand { foo = "bar" } : () -> !transform.any_op + %1 = transform.test_copy_payload %0 : (!transform.any_op) -> !transform.any_op // expected-remark @below {{succeeded}} - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): +^bb0(%arg0: !transform.any_op): + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): // expected-remark @below {{applying transformation "a"}} test_transform_op "a" // expected-remark @below {{applying transformation "b"}} @@ -59,36 +59,36 @@ // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - %0 = test_produce_self_handle_or_forward_operand - sequence %0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): +^bb0(%arg0: !transform.any_op): + %0 = test_produce_self_handle_or_forward_operand : () -> !transform.any_op + sequence %0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): // expected-remark @below {{succeeded}} - test_consume_operand_of_op_kind_or_fail %arg1, "transform.test_produce_self_handle_or_forward_operand" + test_consume_operand_of_op_kind_or_fail %arg1, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } } // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - %0 = sequence %arg0 : !pdl.operation -> !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %1 = test_produce_self_handle_or_forward_operand - yield %1 : !pdl.operation +^bb0(%arg0: !transform.any_op): + %0 = sequence %arg0 : !transform.any_op -> !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %1 = test_produce_self_handle_or_forward_operand : () -> !transform.any_op + yield %1 : !transform.any_op } // expected-remark @below {{succeeded}} - test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } // ----- transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation - test_print_remark_at_operand %0, "matched" : !pdl.operation +^bb0(%arg0: !transform.any_op): + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op + test_print_remark_at_operand %0, "matched" : !transform.any_op } pdl.pattern @some : benefit(1) { @@ -124,18 +124,18 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @const : benefit(1) { %r = pdl.types %0 = pdl.operation "arith.constant" -> (%r : !pdl.range) pdl.rewrite %0 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %f = pdl_match @const in %arg1 : (!pdl.operation) -> !pdl.operation - %m = get_closest_isolated_parent %f : (!pdl.operation) -> !pdl.operation - test_print_remark_at_operand %m, "parent function" : !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %f = pdl_match @const in %arg1 : (!transform.any_op) -> !transform.any_op + %m = get_closest_isolated_parent %f : (!transform.any_op) -> !transform.any_op + test_print_remark_at_operand %m, "parent function" : !transform.any_op } } @@ -147,7 +147,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @match_func : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -155,22 +155,22 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): // This is necessary to run the transformation on something other than the // top-level module, "alternatives" cannot be run on that. - %0 = pdl_match @match_func in %arg1 : (!pdl.operation) -> !pdl.operation - transform.alternatives %0 : !pdl.operation { - ^bb2(%arg2: !pdl.operation): - %1 = transform.test_produce_self_handle_or_forward_operand + %0 = pdl_match @match_func in %arg1 : (!transform.any_op) -> !transform.any_op + transform.alternatives %0 : !transform.any_op { + ^bb2(%arg2: !transform.any_op): + %1 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op // This operation fails, which triggers the next alternative without // reporting the error. - transform.test_consume_operand_of_op_kind_or_fail %1, "transform.sequence" + transform.test_consume_operand_of_op_kind_or_fail %1, "transform.sequence" : !transform.any_op }, { - ^bb2(%arg2: !pdl.operation): - %1 = transform.test_produce_self_handle_or_forward_operand + ^bb2(%arg2: !transform.any_op): + %1 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op // expected-remark @below {{succeeded}} - transform.test_consume_operand_of_op_kind_or_fail %1, "transform.test_produce_self_handle_or_forward_operand" + transform.test_consume_operand_of_op_kind_or_fail %1, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } } } @@ -185,7 +185,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @match_call : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -193,16 +193,16 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = pdl_match @match_call in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = pdl_match @match_call in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = get_closest_isolated_parent %0 : (!transform.any_op) -> !transform.any_op // expected-error @below {{all alternatives failed}} - transform.alternatives %1 : !pdl.operation { - ^bb2(%arg2: !pdl.operation): - %2 = transform.pdl_match @match_call in %arg2 : (!pdl.operation) -> !pdl.operation + transform.alternatives %1 : !transform.any_op { + ^bb2(%arg2: !transform.any_op): + %2 = transform.pdl_match @match_call in %arg2 : (!transform.any_op) -> !transform.any_op // expected-remark @below {{applying}} - transform.test_emit_remark_and_erase_operand %2, "applying" {fail_after_erase} + transform.test_emit_remark_and_erase_operand %2, "applying" {fail_after_erase} : !transform.any_op } } } @@ -218,7 +218,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @match_call : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -226,25 +226,25 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = pdl_match @match_call in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.alternatives %1 : !pdl.operation { - ^bb2(%arg2: !pdl.operation): - %2 = transform.pdl_match @match_call in %arg2 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = pdl_match @match_call in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = get_closest_isolated_parent %0 : (!transform.any_op) -> !transform.any_op + transform.alternatives %1 : !transform.any_op { + ^bb2(%arg2: !transform.any_op): + %2 = transform.pdl_match @match_call in %arg2 : (!transform.any_op) -> !transform.any_op // expected-remark @below {{applying}} - transform.test_emit_remark_and_erase_operand %2, "applying" {fail_after_erase} + transform.test_emit_remark_and_erase_operand %2, "applying" {fail_after_erase} : !transform.any_op }, { - ^bb2(%arg2: !pdl.operation): - %2 = transform.pdl_match @match_call in %arg2 : (!pdl.operation) -> !pdl.operation - transform.test_print_remark_at_operand %2, "still here" : !pdl.operation + ^bb2(%arg2: !transform.any_op): + %2 = transform.pdl_match @match_call in %arg2 : (!transform.any_op) -> !transform.any_op + transform.test_print_remark_at_operand %2, "still here" : !transform.any_op // This alternative succeeds. }, { - ^bb2(%arg2: !pdl.operation): + ^bb2(%arg2: !transform.any_op): // This alternative is never run, so we must not have a remark here. - %2 = transform.pdl_match @match_call in %arg2 : (!pdl.operation) -> !pdl.operation - transform.test_emit_remark_and_erase_operand %2, "should not happen" {fail_after_erase} + %2 = transform.pdl_match @match_call in %arg2 : (!transform.any_op) -> !transform.any_op + transform.test_emit_remark_and_erase_operand %2, "should not happen" {fail_after_erase} : !transform.any_op } } } @@ -259,7 +259,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @match_call : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -267,20 +267,20 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = pdl_match @match_call in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.alternatives %1 : !pdl.operation { - ^bb2(%arg2: !pdl.operation): - %2 = transform.pdl_match @match_call in %arg2 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = pdl_match @match_call in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = get_closest_isolated_parent %0 : (!transform.any_op) -> !transform.any_op + transform.alternatives %1 : !transform.any_op { + ^bb2(%arg2: !transform.any_op): + %2 = transform.pdl_match @match_call in %arg2 : (!transform.any_op) -> !transform.any_op // expected-remark @below {{applying}} - transform.test_emit_remark_and_erase_operand %2, "applying" {fail_after_erase} + transform.test_emit_remark_and_erase_operand %2, "applying" {fail_after_erase} : !transform.any_op }, { - ^bb2(%arg2: !pdl.operation): - %2 = transform.pdl_match @match_call in %arg2 : (!pdl.operation) -> !pdl.operation + ^bb2(%arg2: !transform.any_op): + %2 = transform.pdl_match @match_call in %arg2 : (!transform.any_op) -> !transform.any_op // expected-remark @below {{applying second time}} - transform.test_emit_remark_and_erase_operand %2, "applying second time" + transform.test_emit_remark_and_erase_operand %2, "applying second time" : !transform.any_op } } } @@ -295,7 +295,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @match_call : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -303,27 +303,27 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = pdl_match @match_call in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.alternatives %1 : !pdl.operation -> !pdl.operation { - ^bb2(%arg2: !pdl.operation): - %3 = transform.pdl_match @match_call in %arg2 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = pdl_match @match_call in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = get_closest_isolated_parent %0 : (!transform.any_op) -> !transform.any_op + %2 = transform.alternatives %1 : !transform.any_op -> !transform.any_op { + ^bb2(%arg2: !transform.any_op): + %3 = transform.pdl_match @match_call in %arg2 : (!transform.any_op) -> !transform.any_op // expected-remark @below {{applying}} - transform.test_emit_remark_and_erase_operand %3, "applying" {fail_after_erase} - %4 = transform.test_produce_self_handle_or_forward_operand %3 - transform.yield %4 : !pdl.operation + transform.test_emit_remark_and_erase_operand %3, "applying" {fail_after_erase} : !transform.any_op + %4 = transform.test_produce_self_handle_or_forward_operand %3 : (!transform.any_op) -> !transform.any_op + transform.yield %4 : !transform.any_op }, { - ^bb2(%arg2: !pdl.operation): - %4 = transform.test_produce_self_handle_or_forward_operand - transform.yield %4 : !pdl.operation + ^bb2(%arg2: !transform.any_op): + %4 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op + transform.yield %4 : !transform.any_op } // The first alternative failed, so the returned value is taken from the // second alternative, associated test_produce_self_handle_or_forward_operand rather // than pdl_match. // expected-remark @below {{succeeded}} - transform.test_consume_operand_of_op_kind_or_fail %2, "transform.test_produce_self_handle_or_forward_operand" + transform.test_consume_operand_of_op_kind_or_fail %2, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } } @@ -343,16 +343,16 @@ } transform.sequence failures(propagate) { - ^bb1(%arg1: !pdl.operation): + ^bb1(%arg1: !transform.any_op): // expected-error @below {{scope must not contain the transforms being applied}} - transform.alternatives %arg1 : !pdl.operation { - ^bb2(%arg2: !pdl.operation): - %0 = transform.test_produce_self_handle_or_forward_operand - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.sequence" + transform.alternatives %arg1 : !transform.any_op { + ^bb2(%arg2: !transform.any_op): + %0 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.sequence" : !transform.any_op }, { - ^bb2(%arg2: !pdl.operation): - %0 = transform.test_produce_self_handle_or_forward_operand - transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" + ^bb2(%arg2: !transform.any_op): + %0 = transform.test_produce_self_handle_or_forward_operand : () -> !transform.any_op + transform.test_consume_operand_of_op_kind_or_fail %0, "transform.test_produce_self_handle_or_forward_operand" : !transform.any_op } } } @@ -368,7 +368,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @match_const : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -377,13 +377,13 @@ } - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = transform.pdl_match @match_const in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = transform.loop.get_parent_for %0 : (!pdl.operation) -> !pdl.operation + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = transform.pdl_match @match_const in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.loop.get_parent_for %0 : (!transform.any_op) -> !transform.any_op // expected-error @below {{only isolated-from-above ops can be alternative scopes}} - alternatives %1 : !pdl.operation { - ^bb2(%arg2: !pdl.operation): + alternatives %1 : !transform.any_op { + ^bb2(%arg2: !transform.any_op): } } } @@ -396,7 +396,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -404,12 +404,12 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{application of transform.test_wrong_number_of_results expected to produce 3 results (actually produced 1).}} // expected-note @below {{if you need variadic results, consider a generic `apply` instead of the specialized `applyToOne`.}} - transform.test_wrong_number_of_results %0 + transform.test_wrong_number_of_results %0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } } @@ -423,7 +423,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -431,12 +431,12 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{application of transform.test_wrong_number_of_multi_results expected to produce 1 results (actually produced 0)}} // expected-note @below {{if you need variadic results, consider a generic `apply` instead of the specialized `applyToOne`.}} - transform.test_wrong_number_of_multi_results %0 + transform.test_wrong_number_of_multi_results %0 : (!transform.any_op) -> (!transform.any_op) } } @@ -450,7 +450,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -458,11 +458,11 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op // Transform matches 3 ops and produces 2 results. - %1:2 = transform.test_correct_number_of_multi_results %0 + %1:2 = transform.test_correct_number_of_multi_results %0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) } } @@ -474,7 +474,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -482,11 +482,11 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op // Transform fails to match any but still produces 2 results. - %1:2 = transform.test_correct_number_of_multi_results %0 + %1:2 = transform.test_correct_number_of_multi_results %0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) } } @@ -500,7 +500,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -508,10 +508,10 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation - transform.test_mixed_null_and_non_null_results %0 + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op + transform.test_mixed_null_and_non_null_results %0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) } } @@ -530,7 +530,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @addi : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -544,12 +544,12 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @addi in %arg1 : (!pdl.operation) -> !pdl.operation - %1 = pdl_match @subi in %arg1 : (!pdl.operation) -> !pdl.operation - %2 = merge_handles %0, %1 : !pdl.operation - test_print_remark_at_operand %2, "matched" : !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @addi in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = pdl_match @subi in %arg1 : (!transform.any_op) -> !transform.any_op + %2 = merge_handles %0, %1 : !transform.any_op + test_print_remark_at_operand %2, "matched" : !transform.any_op } } @@ -563,7 +563,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -571,11 +571,11 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{failed to apply}} - transform.test_mixed_sucess_and_silenceable %0 + transform.test_mixed_success_and_silenceable %0 : !transform.any_op } } @@ -587,7 +587,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -595,12 +595,12 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(suppress) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(suppress) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op // Not expecting error here because we are suppressing it. // expected-remark @below {{foo}} - test_emit_remark_and_erase_operand %0, "foo" {fail_after_erase} + test_emit_remark_and_erase_operand %0, "foo" {fail_after_erase} : !transform.any_op } } @@ -612,7 +612,7 @@ } transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operands %1 = pdl.types @@ -620,12 +620,12 @@ pdl.rewrite %2 with "transform.dialect" } - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{silenceable error}} // expected-remark @below {{foo}} - test_emit_remark_and_erase_operand %0, "foo" {fail_after_erase} + test_emit_remark_and_erase_operand %0, "foo" {fail_after_erase} : !transform.any_op } } @@ -747,10 +747,10 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %addi = transform.structured.match ops{["arith.addi"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %muli = get_producer_of_operand %addi[0] : (!pdl.operation) -> !pdl.operation - transform.test_print_remark_at_operand %muli, "found muli" : !pdl.operation +^bb1(%arg1: !transform.any_op): + %addi = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %muli = get_producer_of_operand %addi[0] : (!transform.any_op) -> !transform.any_op + transform.test_print_remark_at_operand %muli, "found muli" : !transform.any_op } // ----- @@ -762,10 +762,10 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %muli = transform.structured.match ops{["arith.muli"]} in %arg1 : (!pdl.operation) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %muli = transform.structured.match ops{["arith.muli"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{could not find a producer for operand number: 0 of}} - %bbarg = get_producer_of_operand %muli[0] : (!pdl.operation) -> !pdl.operation + %bbarg = get_producer_of_operand %muli[0] : (!transform.any_op) -> !transform.any_op } @@ -779,10 +779,10 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %muli = transform.structured.match ops{["arith.muli"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %addi = get_consumers_of_result %muli[0] : (!pdl.operation) -> !pdl.operation - transform.test_print_remark_at_operand %addi, "found addi" : !pdl.operation +^bb1(%arg1: !transform.any_op): + %muli = transform.structured.match ops{["arith.muli"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %addi = get_consumers_of_result %muli[0] : (!transform.any_op) -> !transform.any_op + transform.test_print_remark_at_operand %addi, "found addi" : !transform.any_op } // ----- @@ -794,10 +794,10 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %muli = transform.structured.match ops{["arith.muli"]} in %arg1 : (!pdl.operation) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %muli = transform.structured.match ops{["arith.muli"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{handle must be mapped to exactly one payload op}} - %bbarg = get_consumers_of_result %muli[0] : (!pdl.operation) -> !pdl.operation + %bbarg = get_consumers_of_result %muli[0] : (!transform.any_op) -> !transform.any_op } @@ -809,10 +809,10 @@ } transform.sequence failures(propagate) { -^bb1(%arg1: !pdl.operation): - %muli = transform.structured.match ops{["arith.muli"]} in %arg1 : (!pdl.operation) -> !pdl.operation +^bb1(%arg1: !transform.any_op): + %muli = transform.structured.match ops{["arith.muli"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{result number overflow}} - %bbarg = get_consumers_of_result %muli[1] : (!pdl.operation) -> !pdl.operation + %bbarg = get_consumers_of_result %muli[1] : (!transform.any_op) -> !transform.any_op } @@ -923,18 +923,18 @@ "other_dialect.other_op"() : () -> () transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @other : benefit(1) { %0 = pdl.operation "other_dialect.other_op" pdl.rewrite %0 with "transform.dialect" } - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = pdl_match @other in %arg1 : (!pdl.operation) -> !pdl.operation + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = pdl_match @other in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{expected the payload operation to belong to the 'test' dialect}} - %2 = transform.cast %0 : !pdl.operation to !transform.test_dialect_op - transform.cast %2 : !transform.test_dialect_op to !pdl.operation + %2 = transform.cast %0 : !transform.any_op to !transform.test_dialect_op + transform.cast %2 : !transform.test_dialect_op to !transform.any_op } } @@ -944,17 +944,17 @@ "other_dialect.other_op"() : () -> () transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @some : benefit(1) { %0 = pdl.operation "test.some_op" pdl.rewrite %0 with "transform.dialect" } - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation - %2 = transform.cast %0 : !pdl.operation to !transform.op<"test.some_op"> - transform.cast %2 : !transform.op<"test.some_op"> to !pdl.operation + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op + %2 = transform.cast %0 : !transform.any_op to !transform.op<"test.some_op"> + transform.cast %2 : !transform.op<"test.some_op"> to !transform.any_op } } @@ -965,36 +965,36 @@ "other_dialect.other_op"() : () -> () transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): pdl.pattern @other : benefit(1) { %0 = pdl.operation "other_dialect.other_op" pdl.rewrite %0 with "transform.dialect" } - sequence %arg0 : !pdl.operation failures(propagate) { - ^bb1(%arg1: !pdl.operation): - %0 = pdl_match @other in %arg1 : (!pdl.operation) -> !pdl.operation + sequence %arg0 : !transform.any_op failures(propagate) { + ^bb1(%arg1: !transform.any_op): + %0 = pdl_match @other in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{incompatible payload operation name}} - %2 = transform.cast %0 : !pdl.operation to !transform.op<"test.some_op"> - transform.cast %2 : !transform.op<"test.some_op"> to !pdl.operation + %2 = transform.cast %0 : !transform.any_op to !transform.op<"test.some_op"> + transform.cast %2 : !transform.op<"test.some_op"> to !transform.any_op } } // ----- transform.with_pdl_patterns { -^bb0(%arg0: !pdl.operation): - transform.sequence %arg0 : !pdl.operation failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = pdl_match @some in %arg1 : (!pdl.operation) -> !pdl.operation +^bb0(%arg0: !transform.any_op): + transform.sequence %arg0 : !transform.any_op failures(propagate) { + ^bb0(%arg1: !transform.any_op): + %0 = pdl_match @some in %arg1 : (!transform.any_op) -> !transform.any_op // here, the handles nested under are {%arg0, %arg1, %0} // expected-remark @below {{3 handles nested under}} - transform.test_report_number_of_tracked_handles_nested_under %arg1 + transform.test_report_number_of_tracked_handles_nested_under %arg1 : !transform.any_op // expected-remark @below {{erased}} - transform.test_emit_remark_and_erase_operand %0, "erased" + transform.test_emit_remark_and_erase_operand %0, "erased" : !transform.any_op // here, the handles nested under are only {%arg0, %arg1} // expected-remark @below {{2 handles nested under}} - transform.test_report_number_of_tracked_handles_nested_under %arg1 + transform.test_report_number_of_tracked_handles_nested_under %arg1 : !transform.any_op } pdl.pattern @some : benefit(1) { @@ -1065,9 +1065,9 @@ // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): - %0 = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !pdl.operation - %1 = transform.test_produce_param_with_number_of_test_ops %0 : !pdl.operation +^bb0(%arg0: !transform.any_op): + %0 = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %1 = transform.test_produce_param_with_number_of_test_ops %0 : !transform.any_op // expected-remark @below {{1 : i32, 3 : i32}} transform.test_print_param %1 : !transform.test_dialect_param %2 = transform.test_add_to_param %1, 100 diff --git a/mlir/test/Dialect/Transform/transform-state-extension.mlir b/mlir/test/Dialect/Transform/transform-state-extension.mlir --- a/mlir/test/Dialect/Transform/transform-state-extension.mlir +++ b/mlir/test/Dialect/Transform/transform-state-extension.mlir @@ -3,15 +3,15 @@ // expected-note @below {{associated payload op}} module { transform.sequence failures(propagate) { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): // expected-remark @below {{extension absent}} - test_check_if_test_extension_present %arg0 + test_check_if_test_extension_present %arg0 : !transform.any_op test_add_test_extension "A" // expected-remark @below {{extension present, A}} - test_check_if_test_extension_present %arg0 + test_check_if_test_extension_present %arg0 : !transform.any_op test_remove_test_extension // expected-remark @below {{extension absent}} - test_check_if_test_extension_present %arg0 + test_check_if_test_extension_present %arg0 : !transform.any_op } } @@ -20,12 +20,12 @@ // expected-note @below {{associated payload op}} module { transform.sequence failures(propagate) { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): test_add_test_extension "A" test_remove_test_extension test_add_test_extension "B" // expected-remark @below {{extension present, B}} - test_check_if_test_extension_present %arg0 + test_check_if_test_extension_present %arg0 : !transform.any_op } } @@ -34,56 +34,56 @@ // expected-note @below {{associated payload op}} module { transform.sequence failures(propagate) { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): test_add_test_extension "A" // expected-remark @below {{extension present, A}} - test_check_if_test_extension_present %arg0 + test_check_if_test_extension_present %arg0 : !transform.any_op // expected-note @below {{associated payload op}} - test_remap_operand_to_self %arg0 + test_remap_operand_to_self %arg0 : (!transform.any_op) -> !transform.any_op // expected-remark @below {{extension present, A}} - test_check_if_test_extension_present %arg0 + test_check_if_test_extension_present %arg0 : !transform.any_op } } // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): test_add_test_extension "A" // This is okay because we are replacing the top-level module operation // (0 results) with this operation that has _more_ (1) results. - %dummy = test_remap_operand_to_self %arg0 : !pdl.operation + %dummy = test_remap_operand_to_self %arg0 : (!transform.any_op) -> !transform.any_op } // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): test_add_test_extension "A" - %dummy = test_remap_operand_to_self %arg0 : !pdl.operation + %dummy = test_remap_operand_to_self %arg0 : (!transform.any_op) -> !transform.any_op // This is still okay. Even though we are replacing the previous // operation with (1 result) with this operation that has less (0) results, // there is no handle to the result, hence no issue with value handle update. - test_remap_operand_to_self %dummy + test_remap_operand_to_self %dummy : (!transform.any_op) -> !transform.any_op } // ----- transform.sequence failures(propagate) { -^bb0(%arg0: !pdl.operation): +^bb0(%arg0: !transform.any_op): test_add_test_extension "A" // expected-error @below {{cannot replace an op with another op producing fewer results while tracking handles}} - %dummy = test_remap_operand_to_self %arg0 : !pdl.operation - %valuehandle = transform.get_result %dummy[0] : (!pdl.operation) -> !transform.any_value - test_remap_operand_to_self %dummy + %dummy = test_remap_operand_to_self %arg0 : (!transform.any_op) -> !transform.any_op + %valuehandle = transform.get_result %dummy[0] : (!transform.any_op) -> !transform.any_value + test_remap_operand_to_self %dummy : (!transform.any_op) -> () } // ----- module { transform.sequence failures(suppress) { - ^bb0(%arg0: !pdl.operation): + ^bb0(%arg0: !transform.any_op): // expected-error @below {{TestTransformStateExtension missing}} - test_remap_operand_to_self %arg0 + test_remap_operand_to_self %arg0 : (!transform.any_op) -> !transform.any_op } } diff --git a/mlir/test/Dialect/Vector/vector-broadcast-lowering-transforms.mlir b/mlir/test/Dialect/Vector/vector-broadcast-lowering-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-broadcast-lowering-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-broadcast-lowering-transforms.mlir @@ -163,10 +163,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op transform.vector.lower_broadcast %f - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir @@ -209,8 +209,8 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_contraction %module_op lowering_strategy = "outerproduct" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-contract-to-dot-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-to-dot-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-contract-to-dot-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-to-dot-transforms.mlir @@ -296,11 +296,11 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op %f2 = transform.vector.lower_contraction %f lowering_strategy = "dot" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-contract-to-matrix-intrinsics-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-to-matrix-intrinsics-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-contract-to-matrix-intrinsics-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-to-matrix-intrinsics-transforms.mlir @@ -44,14 +44,14 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op %f2 = transform.vector.lower_contraction %f lowering_strategy = "matmulintrinsics" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op %f3 = transform.vector.lower_shape_cast %f2 - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-contract-to-outerproduct-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-to-outerproduct-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-contract-to-outerproduct-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-to-outerproduct-transforms.mlir @@ -343,11 +343,11 @@ transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op %f2 = transform.vector.lower_contraction %f lowering_strategy = "outerproduct" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-contract-to-parallel-arith-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-to-parallel-arith-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-contract-to-parallel-arith-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-to-parallel-arith-transforms.mlir @@ -52,11 +52,11 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op %f2 = transform.vector.lower_contraction %f lowering_strategy = "parallelarith" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir b/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir @@ -92,12 +92,12 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op transform.vector.lower_masks %f - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -122,10 +122,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op transform.vector.lower_masked_transfers %f - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir --- a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir @@ -266,8 +266,8 @@ // CHECK: vector.transpose %[[INPUT]], [1, 0, 2] : vector<3x4x5xf32> to vector<4x3x5xf32> transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_multi_reduction %module_op lowering_strategy = "innerreduction" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-multi-reduction-outer-lowering.mlir b/mlir/test/Dialect/Vector/vector-multi-reduction-outer-lowering.mlir --- a/mlir/test/Dialect/Vector/vector-multi-reduction-outer-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-multi-reduction-outer-lowering.mlir @@ -189,8 +189,8 @@ // CHECK: return %{{.+}} transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_multi_reduction %module_op lowering_strategy = "innerparallel" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-outerproduct-lowering-transforms.mlir b/mlir/test/Dialect/Vector/vector-outerproduct-lowering-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-outerproduct-lowering-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-outerproduct-lowering-transforms.mlir @@ -136,13 +136,13 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op %f2 = transform.vector.lower_outerproduct %f - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op %f3 = transform.vector.lower_broadcast %f2 - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-shape-cast-lowering-transforms.mlir b/mlir/test/Dialect/Vector/vector-shape-cast-lowering-transforms.mlir --- a/mlir/test/Dialect/Vector/vector-shape-cast-lowering-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-shape-cast-lowering-transforms.mlir @@ -125,10 +125,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %f = transform.structured.match ops{["func.func"]} in %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op %f2 = transform.vector.lower_shape_cast %f - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir @@ -30,7 +30,7 @@ transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.apply_rank_reducing_subview_patterns %module_op - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split-copy-transform.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split-copy-transform.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split-copy-transform.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split-copy-transform.mlir @@ -107,10 +107,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.split_transfer_full_partial %module_op split_transfer_strategy = "linalg-copy" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -168,10 +168,10 @@ // CHECK: } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.split_transfer_full_partial %module_op split_transfer_strategy = "linalg-copy" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -236,8 +236,8 @@ // CHECK: } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.split_transfer_full_partial %module_op split_transfer_strategy = "linalg-copy" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -102,10 +102,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.split_transfer_full_partial %module_op split_transfer_strategy = "vector-transfer" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -160,10 +160,10 @@ transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.split_transfer_full_partial %module_op split_transfer_strategy = "vector-transfer" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -222,10 +222,10 @@ // CHECK: } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.split_transfer_full_partial %module_op split_transfer_strategy = "vector-transfer" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -264,8 +264,8 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.split_transfer_full_partial %module_op split_transfer_strategy = "vector-transfer" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir b/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir @@ -239,12 +239,12 @@ transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %m2 = transform.vector.lower_transfer %module_op max_transfer_rank = 99 - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op transform.vector.apply_transfer_permutation_patterns %m2 - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -362,10 +362,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): %m2 = transform.vector.lower_transfer %module_op max_transfer_rank = 99 - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op transform.vector.apply_transfer_permutation_patterns %m2 - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir b/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir --- a/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir @@ -75,10 +75,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_transpose %module_op lowering_strategy = "eltwise" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -98,10 +98,10 @@ transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_transpose %module_op lowering_strategy = "shuffle_1d" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -117,10 +117,10 @@ transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_transpose %module_op lowering_strategy = "flat_transpose" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -604,10 +604,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_transpose %module_op avx2_lowering_strategy = true - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -682,10 +682,10 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_transpose %module_op lowering_strategy = "shuffle_16x16" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } // ----- @@ -761,8 +761,8 @@ } transform.sequence failures(propagate) { -^bb1(%module_op: !pdl.operation): +^bb1(%module_op: !transform.any_op): transform.vector.lower_transpose %module_op lowering_strategy = "shuffle_16x16" - : (!pdl.operation) -> !pdl.operation + : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir @@ -25,9 +25,9 @@ } transform.sequence failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["linalg.conv_1d"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1, %loop = transform.structured.tile %0 [4] : (!pdl.operation) -> (!pdl.operation, !pdl.operation) + ^bb0(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.conv_1d"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loop = transform.structured.tile %0 [4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) } func.func @main() { diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir @@ -27,9 +27,9 @@ } transform.sequence failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["linalg.conv_1d_nwc_wcf"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1, %loops:2 = transform.structured.tile %0 [2, 4] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation) + ^bb0(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.conv_1d_nwc_wcf"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } func.func @main() { diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir @@ -25,9 +25,9 @@ } transform.sequence failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1, %loops:2 = transform.structured.tile %0 [2, 2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation) + ^bb0(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:2 = transform.structured.tile %0 [2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } func.func @main() { diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir @@ -27,9 +27,9 @@ } transform.sequence failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_hwcf"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1, %loops:4 = transform.structured.tile %0 [2, 3, 3, 2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) + ^bb0(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_hwcf"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:4 = transform.structured.tile %0 [2, 3, 3, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) } func.func @main() { diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir @@ -25,9 +25,9 @@ } transform.sequence failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["linalg.conv_3d"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1, %loops:3 = transform.structured.tile %0 [2, 2, 2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) + ^bb0(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.conv_3d"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:3 = transform.structured.tile %0 [2, 2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) } func.func @main() { diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir @@ -27,9 +27,9 @@ } transform.sequence failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["linalg.conv_3d_ndhwc_dhwcf"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1, %loops:3 = transform.structured.tile %0 [0, 5, 5, 5] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) + ^bb0(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.conv_3d_ndhwc_dhwcf"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:3 = transform.structured.tile %0 [0, 5, 5, 5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) } func.func @main() { diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -37,9 +37,9 @@ } transform.sequence failures(propagate) { - ^bb0(%arg1: !pdl.operation): - %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation - %1, %loops:3 = transform.structured.tile %0 [1, 2, 3] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) + ^bb0(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1, %loops:3 = transform.structured.tile %0 [1, 2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) } func.func private @printMemrefF32(%ptr : tensor<*xf32>) diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2340,6 +2340,29 @@ // ----- +llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { + %0 = llvm.mlir.constant(1 : i32) : i32 + omp.task { + omp.parallel { + llvm.store %0, %arg0 : !llvm.ptr + omp.terminator + } + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: @par_task_ +// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @par_task_..omp_par.wrapper) +// CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]]) +// CHECK-LABEL: define internal void @par_task_..omp_par +// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8 +// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @par_task_..omp_par..omp_par, ptr %[[ARG_ALLOC]]) +// CHECK: define internal void @par_task_..omp_par..omp_par +// CHECK: define i32 @par_task_..omp_par.wrapper +// CHECK: call void @par_task_..omp_par +// ----- + llvm.func @foo() -> () llvm.func @omp_taskgroup(%x: i32, %y: i32, %zaddr: !llvm.ptr) { diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td --- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td +++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td @@ -44,9 +44,10 @@ : Op, DeclareOpInterfaceMethods]> { - let arguments = (ins Optional:$operand); - let results = (outs PDL_Operation:$res); - let assemblyFormat = "($operand^)? attr-dict"; + let arguments = (ins Optional:$operand); + let results = (outs TransformHandleTypeInterface:$res); + let assemblyFormat = + "($operand^)? attr-dict `:` functional-type($operand, $res)"; let cppNamespace = "::mlir::test"; } @@ -100,8 +101,10 @@ DeclareOpInterfaceMethods]> { let arguments = (ins Transform_AnyHandleOrParamType:$operand, - Optional:$second_operand); - let assemblyFormat = "$operand (`,` $second_operand^)? attr-dict `:` type($operand)"; + Optional:$second_operand); + let assemblyFormat = + "$operand (`,` $second_operand^)? attr-dict `:` type($operand)" + "(`,` type($second_operand)^)?"; let cppNamespace = "::mlir::test"; } @@ -110,9 +113,10 @@ [DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]> { let arguments = (ins - PDL_Operation:$operand, + TransformHandleTypeInterface:$operand, StrAttr:$op_kind); - let assemblyFormat = "$operand `,` $op_kind attr-dict"; + let assemblyFormat = + "$operand `,` $op_kind attr-dict `:` type($operand)"; let cppNamespace = "::mlir::test"; } @@ -166,8 +170,8 @@ : Op, DeclareOpInterfaceMethods]> { - let arguments = (ins PDL_Operation:$operand); - let assemblyFormat = "$operand attr-dict"; + let arguments = (ins TransformHandleTypeInterface:$operand); + let assemblyFormat = "$operand attr-dict `:` type($operand)"; let cppNamespace = "::mlir::test"; } @@ -175,9 +179,9 @@ : Op, DeclareOpInterfaceMethods]> { - let arguments = (ins PDL_Operation:$operand); + let arguments = (ins TransformHandleTypeInterface:$operand); let results = (outs Optional:$out); - let assemblyFormat = "$operand attr-dict (`:` type($out)^)?"; + let assemblyFormat = "$operand attr-dict `:` functional-type($operand, $out)"; let cppNamespace = "::mlir::test"; } @@ -221,9 +225,9 @@ [DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, FunctionalStyleTransformOpTrait]> { - let arguments = (ins PDL_Operation:$target, StrAttr:$remark, + let arguments = (ins TransformHandleTypeInterface:$target, StrAttr:$remark, UnitAttr:$fail_after_erase); - let assemblyFormat = "$target `,` $remark attr-dict"; + let assemblyFormat = "$target `,` $remark attr-dict `:` type($target)"; let cppNamespace = "::mlir::test"; } @@ -231,11 +235,12 @@ : Op { - let arguments = (ins PDL_Operation:$target); - let results = (outs PDL_Operation:$a, - PDL_Operation:$b, - PDL_Operation:$c); - let assemblyFormat = "$target attr-dict"; + let arguments = (ins TransformHandleTypeInterface:$target); + let results = (outs TransformHandleTypeInterface:$a, + TransformHandleTypeInterface:$b, + TransformHandleTypeInterface:$c); + let assemblyFormat = + "$target attr-dict `:` functional-type(operands, results)"; let cppNamespace = "::mlir::test"; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( @@ -249,9 +254,10 @@ : Op { - let arguments = (ins PDL_Operation:$target); - let results = (outs PDL_Operation:$result); - let assemblyFormat = "$target attr-dict"; + let arguments = (ins TransformHandleTypeInterface:$target); + let results = (outs TransformHandleTypeInterface:$result); + let assemblyFormat = + "$target attr-dict `:` functional-type($target, $result)"; let cppNamespace = "::mlir::test"; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( @@ -265,10 +271,11 @@ : Op { - let arguments = (ins PDL_Operation:$target); - let results = (outs PDL_Operation:$result1, - PDL_Operation:$result2); - let assemblyFormat = "$target attr-dict"; + let arguments = (ins TransformHandleTypeInterface:$target); + let results = (outs TransformHandleTypeInterface:$result1, + TransformHandleTypeInterface:$result2); + let assemblyFormat = + "$target attr-dict `:` functional-type(operands, results)"; let cppNamespace = "::mlir::test"; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( @@ -282,10 +289,11 @@ : Op { - let arguments = (ins PDL_Operation:$target); - let results = (outs PDL_Operation:$null, - PDL_Operation:$non_null); - let assemblyFormat = "$target attr-dict"; + let arguments = (ins TransformHandleTypeInterface:$target); + let results = (outs TransformHandleTypeInterface:$null, + TransformHandleTypeInterface:$non_null); + let assemblyFormat = + "$target attr-dict `:` functional-type(operands, results)"; let cppNamespace = "::mlir::test"; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( @@ -296,12 +304,12 @@ } def TestMixedSuccessAndSilenceableOp - : Op { - let arguments = (ins PDL_Operation:$target); + let arguments = (ins TransformHandleTypeInterface:$target); let results = (outs); - let assemblyFormat = "$target attr-dict"; + let assemblyFormat = "$target attr-dict `:` type($target)"; let cppNamespace = "::mlir::test"; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( @@ -324,18 +332,19 @@ : Op, DeclareOpInterfaceMethods]> { - let arguments = (ins PDL_Operation:$handle); - let results = (outs PDL_Operation:$copy); + let arguments = (ins TransformHandleTypeInterface:$handle); + let results = (outs TransformHandleTypeInterface:$copy); let cppNamespace = "::mlir::test"; - let assemblyFormat = "$handle attr-dict"; + let assemblyFormat = + "$handle attr-dict `:` functional-type(operands, results)"; } def TestReportNumberOfTrackedHandlesNestedUnder : Op, DeclareOpInterfaceMethods]> { - let arguments = (ins PDL_Operation:$target); - let assemblyFormat = "$target attr-dict"; + let arguments = (ins TransformHandleTypeInterface:$target); + let assemblyFormat = "$target attr-dict `:` type($target)"; let cppNamespace = "::mlir::test"; } diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt --- a/mlir/test/lib/IR/CMakeLists.txt +++ b/mlir/test/lib/IR/CMakeLists.txt @@ -18,6 +18,7 @@ TestSymbolUses.cpp TestRegions.cpp TestTypes.cpp + TestUseListOrders.cpp TestVisitors.cpp TestVisitorsGeneric.cpp diff --git a/mlir/test/lib/IR/TestUseListOrders.cpp b/mlir/test/lib/IR/TestUseListOrders.cpp new file mode 100644 --- /dev/null +++ b/mlir/test/lib/IR/TestUseListOrders.cpp @@ -0,0 +1,227 @@ +//===- TestPrintDefUse.cpp - Passes to illustrate the IR def-use chains ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Bytecode/BytecodeWriter.h" +#include "mlir/Bytecode/Encoding.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/OwningOpRef.h" +#include "mlir/Parser/Parser.h" +#include "mlir/Pass/Pass.h" + +#include +#include + +using namespace mlir; + +namespace { +/// This pass tests that: +/// 1) we can shuffle use-lists correctly; +/// 2) use-list orders are preserved after a roundtrip to bytecode. +class TestPreserveUseListOrders + : public PassWrapper> { +public: + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestPreserveUseListOrders) + + TestPreserveUseListOrders() = default; + TestPreserveUseListOrders(const TestPreserveUseListOrders &pass) + : PassWrapper(pass) {} + StringRef getArgument() const final { return "test-verify-uselistorder"; } + StringRef getDescription() const final { + return "Verify that roundtripping the IR to bytecode preserves the order " + "of the uselists"; + } + Option rngSeed{*this, "rng-seed", + llvm::cl::desc("Specify an input random seed"), + llvm::cl::init(1)}; + + LogicalResult initialize(MLIRContext *context) override { + rng.seed(static_cast(rngSeed)); + return success(); + } + + void runOnOperation() override { + // Clone the module so that we can plug in this pass to any other + // independently. + OwningOpRef cloneModule = getOperation().clone(); + + // 1. Compute the op numbering of the module. + computeOpNumbering(*cloneModule); + + // 2. Loop over all the values and shuffle the uses. While doing so, check + // that each shuffle is correct. + if (failed(shuffleUses(*cloneModule))) + return signalPassFailure(); + + // 3. Do a bytecode roundtrip to version 3, which supports use-list order + // preservation. + auto roundtripModuleOr = doRoundtripToBytecode(*cloneModule, 3); + // If the bytecode roundtrip failed, try to roundtrip the original module + // to version 2, which does not support use-list. If this also fails, the + // original module had an issue unrelated to uselists. + if (failed(roundtripModuleOr)) { + auto testModuleOr = doRoundtripToBytecode(getOperation(), 2); + if (failed(testModuleOr)) + return; + + return signalPassFailure(); + } + + // 4. Recompute the op numbering on the new module. The numbering should be + // the same as (1), but on the new operation pointers. + computeOpNumbering(roundtripModuleOr->get()); + + // 5. Loop over all the values and verify that the use-list is consistent + // with the post-shuffle order of step (2). + if (failed(verifyUseListOrders(roundtripModuleOr->get()))) + return signalPassFailure(); + } + +private: + FailureOr> doRoundtripToBytecode(Operation *module, + uint32_t version) { + std::string str; + llvm::raw_string_ostream m(str); + BytecodeWriterConfig config; + config.setDesiredBytecodeVersion(version); + if (failed(writeBytecodeToFile(module, m, config))) + return failure(); + + ParserConfig parseConfig(&getContext(), /*verifyAfterParse=*/true); + auto newModuleOp = parseSourceString(StringRef(str), parseConfig); + if (!newModuleOp.get()) + return failure(); + return newModuleOp; + } + + /// Compute an ordered numbering for all the operations in the IR. + void computeOpNumbering(Operation *topLevelOp) { + uint32_t operationID = 0; + opNumbering.clear(); + topLevelOp->walk( + [&](Operation *op) { opNumbering.try_emplace(op, operationID++); }); + } + + template + SmallVector getUseIDs(ValueT val) { + return SmallVector(llvm::map_range(val.getUses(), [&](auto &use) { + return bytecode::getUseID(use, opNumbering.at(use.getOwner())); + })); + } + + LogicalResult shuffleUses(Operation *topLevelOp) { + uint32_t valueID = 0; + /// Permute randomly the use-list of each value. It is guaranteed that at + /// least one pair of the use list is permuted. + auto doShuffleForRange = [&](ValueRange range) -> LogicalResult { + for (auto val : range) { + if (val.use_empty() || val.hasOneUse()) + continue; + + /// Get a valid index permutation for the uses of value. + SmallVector permutation = getRandomPermutation(val); + + /// Store original order and verify that the shuffle was applied + /// correctly. + auto useIDs = getUseIDs(val); + + /// Apply shuffle to the uselist. + val.shuffleUseList(permutation); + + /// Get the new order and verify the shuffle happened correctly. + auto permutedIDs = getUseIDs(val); + if (permutedIDs.size() != useIDs.size()) + return failure(); + for (size_t idx = 0; idx < permutation.size(); idx++) + if (useIDs[idx] != permutedIDs[permutation[idx]]) + return failure(); + + referenceUseListOrder.try_emplace( + valueID++, llvm::map_range(val.getUses(), [&](auto &use) { + return bytecode::getUseID(use, opNumbering.at(use.getOwner())); + })); + } + return success(); + }; + + return walkOverValues(topLevelOp, doShuffleForRange); + } + + LogicalResult verifyUseListOrders(Operation *topLevelOp) { + uint32_t valueID = 0; + /// Check that the use-list for the value range matches the one stored in + /// the reference. + auto doValidationForRange = [&](ValueRange range) -> LogicalResult { + for (auto val : range) { + if (val.use_empty() || val.hasOneUse()) + continue; + auto referenceOrder = referenceUseListOrder.at(valueID++); + for (auto [use, referenceID] : + llvm::zip(val.getUses(), referenceOrder)) { + uint64_t uniqueID = + bytecode::getUseID(use, opNumbering.at(use.getOwner())); + if (uniqueID != referenceID) { + use.getOwner()->emitError() + << "found use-list order mismatch for value: " << val; + return failure(); + } + } + } + return success(); + }; + + return walkOverValues(topLevelOp, doValidationForRange); + } + + /// Walk over blocks and operations and execute a callable over the ranges of + /// operands/results respectively. + template + LogicalResult walkOverValues(Operation *topLevelOp, FuncT callable) { + auto blockWalk = topLevelOp->walk([&](Block *block) { + if (failed(callable(block->getArguments()))) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + + if (blockWalk.wasInterrupted()) + return failure(); + + auto resultsWalk = topLevelOp->walk([&](Operation *op) { + if (failed(callable(op->getResults()))) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + + return failure(resultsWalk.wasInterrupted()); + } + + /// Creates a random permutation of the uselist order chain of the provided + /// value. + SmallVector getRandomPermutation(Value value) { + size_t numUses = std::distance(value.use_begin(), value.use_end()); + SmallVector permutation(numUses); + unsigned zero = 0; + std::iota(permutation.begin(), permutation.end(), zero); + std::shuffle(permutation.begin(), permutation.end(), rng); + return permutation; + } + + /// Map each value to its use-list order encoded with unique use IDs. + DenseMap> referenceUseListOrder; + + /// Map each operation to its global ID. + DenseMap opNumbering; + + std::default_random_engine rng; +}; +} // namespace + +namespace mlir { +void registerTestPreserveUseListOrders() { + PassRegistration(); +} +} // namespace mlir diff --git a/mlir/test/python/dialects/python_test.py b/mlir/test/python/dialects/python_test.py --- a/mlir/test/python/dialects/python_test.py +++ b/mlir/test/python/dialects/python_test.py @@ -131,6 +131,27 @@ del op.unit print(f"Unit: {op.unit}") +# CHECK-LABEL: TEST: attrBuilder +@run +def attrBuilder(): + with Context() as ctx, Location.unknown(): + ctx.allow_unregistered_dialects = True + op = test.AttributesOp(x_bool=True, + x_i16=1, + x_i32=2, + x_i64=3, + x_si16=-1, + x_si32=-2, + x_f32=1.5, + x_f64=2.5, + x_str='x_str', + x_i32_array=[1, 2, 3], + x_i64_array=[4, 5, 6], + x_f32_array=[1.5, -2.5, 3.5], + x_f64_array=[4.5, 5.5, -6.5], + x_i64_dense=[1, 2, 3, 4, 5, 6]) + print(op) + # CHECK-LABEL: TEST: inferReturnTypes @run diff --git a/mlir/test/python/dialects/transform.py b/mlir/test/python/dialects/transform.py --- a/mlir/test/python/dialects/transform.py +++ b/mlir/test/python/dialects/transform.py @@ -32,38 +32,38 @@ @run def testSequenceOp(): sequence = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, - [pdl.OperationType.get()], - pdl.OperationType.get()) + [transform.AnyOpType.get()], + transform.AnyOpType.get()) with InsertionPoint(sequence.body): transform.YieldOp([sequence.bodyTarget]) # CHECK-LABEL: TEST: testSequenceOp - # CHECK: = transform.sequence -> !pdl.operation failures(propagate) { - # CHECK: ^{{.*}}(%[[ARG0:.+]]: !pdl.operation): - # CHECK: yield %[[ARG0]] : !pdl.operation + # CHECK: = transform.sequence -> !transform.any_op failures(propagate) { + # CHECK: ^{{.*}}(%[[ARG0:.+]]: !transform.any_op): + # CHECK: yield %[[ARG0]] : !transform.any_op # CHECK: } @run def testNestedSequenceOp(): - sequence = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, [], pdl.OperationType.get()) + sequence = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, [], transform.AnyOpType.get()) with InsertionPoint(sequence.body): nested = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, [], sequence.bodyTarget) with InsertionPoint(nested.body): doubly_nested = transform.SequenceOp( transform.FailurePropagationMode.PROPAGATE, - [pdl.OperationType.get()], nested.bodyTarget) + [transform.AnyOpType.get()], nested.bodyTarget) with InsertionPoint(doubly_nested.body): transform.YieldOp([doubly_nested.bodyTarget]) transform.YieldOp() transform.YieldOp() # CHECK-LABEL: TEST: testNestedSequenceOp # CHECK: transform.sequence failures(propagate) { - # CHECK: ^{{.*}}(%[[ARG0:.+]]: !pdl.operation): - # CHECK: sequence %[[ARG0]] : !pdl.operation failures(propagate) { - # CHECK: ^{{.*}}(%[[ARG1:.+]]: !pdl.operation): - # CHECK: = sequence %[[ARG1]] : !pdl.operation -> !pdl.operation failures(propagate) { - # CHECK: ^{{.*}}(%[[ARG2:.+]]: !pdl.operation): - # CHECK: yield %[[ARG2]] : !pdl.operation + # CHECK: ^{{.*}}(%[[ARG0:.+]]: !transform.any_op): + # CHECK: sequence %[[ARG0]] : !transform.any_op failures(propagate) { + # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op): + # CHECK: = sequence %[[ARG1]] : !transform.any_op -> !transform.any_op failures(propagate) { + # CHECK: ^{{.*}}(%[[ARG2:.+]]: !transform.any_op): + # CHECK: yield %[[ARG2]] : !transform.any_op # CHECK: } # CHECK: } # CHECK: } @@ -103,58 +103,58 @@ @run def testTransformPDLOps(): - withPdl = transform.WithPDLPatternsOp(pdl.OperationType.get()) + withPdl = transform.WithPDLPatternsOp(transform.AnyOpType.get()) with InsertionPoint(withPdl.body): sequence = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, - [pdl.OperationType.get()], + [transform.AnyOpType.get()], withPdl.bodyTarget) with InsertionPoint(sequence.body): - match = transform.PDLMatchOp(pdl.OperationType.get(), sequence.bodyTarget, "pdl_matcher") + match = transform.PDLMatchOp(transform.AnyOpType.get(), sequence.bodyTarget, "pdl_matcher") transform.YieldOp(match) # CHECK-LABEL: TEST: testTransformPDLOps # CHECK: transform.with_pdl_patterns { - # CHECK: ^{{.*}}(%[[ARG0:.+]]: !pdl.operation): - # CHECK: = sequence %[[ARG0]] : !pdl.operation -> !pdl.operation failures(propagate) { - # CHECK: ^{{.*}}(%[[ARG1:.+]]: !pdl.operation): + # CHECK: ^{{.*}}(%[[ARG0:.+]]: !transform.any_op): + # CHECK: = sequence %[[ARG0]] : !transform.any_op -> !transform.any_op failures(propagate) { + # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op): # CHECK: %[[RES:.+]] = pdl_match @pdl_matcher in %[[ARG1]] - # CHECK: yield %[[RES]] : !pdl.operation + # CHECK: yield %[[RES]] : !transform.any_op # CHECK: } # CHECK: } @run def testGetClosestIsolatedParentOp(): - sequence = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, [], pdl.OperationType.get()) + sequence = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, [], transform.AnyOpType.get()) with InsertionPoint(sequence.body): - transform.GetClosestIsolatedParentOp(pdl.OperationType.get(), sequence.bodyTarget) + transform.GetClosestIsolatedParentOp(transform.AnyOpType.get(), sequence.bodyTarget) transform.YieldOp() # CHECK-LABEL: TEST: testGetClosestIsolatedParentOp # CHECK: transform.sequence - # CHECK: ^{{.*}}(%[[ARG1:.+]]: !pdl.operation): + # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op): # CHECK: = get_closest_isolated_parent %[[ARG1]] @run def testMergeHandlesOp(): - sequence = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, [], pdl.OperationType.get()) + sequence = transform.SequenceOp(transform.FailurePropagationMode.PROPAGATE, [], transform.AnyOpType.get()) with InsertionPoint(sequence.body): transform.MergeHandlesOp([sequence.bodyTarget]) transform.YieldOp() # CHECK-LABEL: TEST: testMergeHandlesOp # CHECK: transform.sequence - # CHECK: ^{{.*}}(%[[ARG1:.+]]: !pdl.operation): + # CHECK: ^{{.*}}(%[[ARG1:.+]]: !transform.any_op): # CHECK: = merge_handles %[[ARG1]] @run def testReplicateOp(): - with_pdl = transform.WithPDLPatternsOp(pdl.OperationType.get()) + with_pdl = transform.WithPDLPatternsOp(transform.AnyOpType.get()) with InsertionPoint(with_pdl.body): sequence = transform.SequenceOp( transform.FailurePropagationMode.PROPAGATE, [], with_pdl.bodyTarget) with InsertionPoint(sequence.body): - m1 = transform.PDLMatchOp(pdl.OperationType.get(), sequence.bodyTarget, "first") - m2 = transform.PDLMatchOp(pdl.OperationType.get(), sequence.bodyTarget, "second") + m1 = transform.PDLMatchOp(transform.AnyOpType.get(), sequence.bodyTarget, "first") + m2 = transform.PDLMatchOp(transform.AnyOpType.get(), sequence.bodyTarget, "second") transform.ReplicateOp(m1, [m2]) transform.YieldOp() # CHECK-LABEL: TEST: testReplicateOp diff --git a/mlir/test/python/python_test_ops.td b/mlir/test/python/python_test_ops.td --- a/mlir/test/python/python_test_ops.td +++ b/mlir/test/python/python_test_ops.td @@ -57,6 +57,23 @@ UnitAttr:$unit); } +def AttributesOp : TestOp<"attributes_op"> { + let arguments = (ins BoolAttr:$x_bool, + I16Attr: $x_i16, + I32Attr: $x_i32, + I64Attr: $x_i64, + SI16Attr: $x_si16, + SI32Attr: $x_si32, + F32Attr: $x_f32, + F64Attr: $x_f64, + StrAttr: $x_str, + I32ArrayAttr: $x_i32_array, + I64ArrayAttr: $x_i64_array, + F32ArrayAttr: $x_f32_array, + F64ArrayAttr: $x_f64_array, + DenseI64ArrayAttr: $x_i64_dense); +} + def PropertyOp : TestOp<"property_op"> { let arguments = (ins I32Attr:$property, I32:$idx); diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -53,6 +53,7 @@ void registerTestPrintDefUsePass(); void registerTestPrintInvalidPass(); void registerTestPrintNestingPass(); +void registerTestPreserveUseListOrders(); void registerTestReducer(); void registerTestSpirvEntryPointABIPass(); void registerTestSpirvModuleCombinerPass(); @@ -167,6 +168,7 @@ registerTestPrintDefUsePass(); registerTestPrintInvalidPass(); registerTestPrintNestingPass(); + registerTestPreserveUseListOrders(); registerTestReducer(); registerTestSpirvEntryPointABIPass(); registerTestSpirvModuleCombinerPass(); diff --git a/utils/bazel/WORKSPACE b/utils/bazel/WORKSPACE --- a/utils/bazel/WORKSPACE +++ b/utils/bazel/WORKSPACE @@ -27,13 +27,6 @@ llvm_configure(name = "llvm-project") -load("@llvm-raw//utils/bazel:terminfo.bzl", "llvm_terminfo_from_env") - -maybe( - llvm_terminfo_from_env, - name = "llvm_terminfo", -) - maybe( http_archive, name = "llvm_zlib", diff --git a/utils/bazel/configure.bzl b/utils/bazel/configure.bzl --- a/utils/bazel/configure.bzl +++ b/utils/bazel/configure.bzl @@ -5,7 +5,6 @@ """Helper macros to configure the LLVM overlay project.""" load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") -load(":terminfo.bzl", "llvm_terminfo_disable", "llvm_terminfo_system") # Directory of overlay files relative to WORKSPACE DEFAULT_OVERLAY_PATH = "llvm-project-overlay" @@ -173,15 +172,3 @@ "targets": attr.string_list(default = DEFAULT_TARGETS), }, ) - -def llvm_disable_optional_support_deps(): - maybe( - llvm_terminfo_disable, - name = "llvm_terminfo", - ) - -def llvm_use_system_support_deps(): - maybe( - llvm_terminfo_system, - name = "llvm_terminfo", - ) diff --git a/utils/bazel/deps_impl/BUILD.bazel b/utils/bazel/deps_impl/BUILD.bazel deleted file mode 100644 --- a/utils/bazel/deps_impl/BUILD.bazel +++ /dev/null @@ -1,5 +0,0 @@ -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -# Required to reference files in this package diff --git a/utils/bazel/deps_impl/terminfo_disable.BUILD b/utils/bazel/deps_impl/terminfo_disable.BUILD deleted file mode 100644 --- a/utils/bazel/deps_impl/terminfo_disable.BUILD +++ /dev/null @@ -1,10 +0,0 @@ -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -# Empty stub library. This doesn't include any terminfo library and doesn't set -# the LLVM `#define`s to enable usage of terminfo. -cc_library( - name = "terminfo", - visibility = ["//visibility:public"], -) diff --git a/utils/bazel/deps_impl/terminfo_system.BUILD b/utils/bazel/deps_impl/terminfo_system.BUILD deleted file mode 100644 --- a/utils/bazel/deps_impl/terminfo_system.BUILD +++ /dev/null @@ -1,15 +0,0 @@ -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -# Wrapper library for some system terminfo. Using this only works if the -# toolchain already has the relevant library search paths configured. It also -# sets the relevant LLVM `#define`s to enoble using terminfo. -cc_library( - name = "terminfo", - defines = ["LLVM_ENABLE_TERMINFO=1"], - # Note that we will replace these link options with ones needed to - # effectively link against a terminfo providing library on the system. - linkopts = {TERMINFO_LINKOPTS}, - visibility = ["//visibility:public"], -) diff --git a/utils/bazel/deps_impl/terminfo_test.c b/utils/bazel/deps_impl/terminfo_test.c deleted file mode 100644 --- a/utils/bazel/deps_impl/terminfo_test.c +++ /dev/null @@ -1,17 +0,0 @@ -/* -This file is licensed under the Apache License v2.0 with LLVM Exceptions. -See https://llvm.org/LICENSE.txt for license information. -SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -extern int setupterm(char *term, int filedes, int *errret); -extern struct term *set_curterm(struct term *termp); -extern int del_curterm(struct term *termp); -extern int tigetnum(char *capname); - -int main() { - setupterm(0, 0, 0); - set_curterm(0); - del_curterm(0); - tigetnum(0); -} diff --git a/utils/bazel/examples/http_archive/WORKSPACE b/utils/bazel/examples/http_archive/WORKSPACE --- a/utils/bazel/examples/http_archive/WORKSPACE +++ b/utils/bazel/examples/http_archive/WORKSPACE @@ -38,8 +38,3 @@ load("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure", "llvm_disable_optional_support_deps") llvm_configure(name = "llvm-project") - -# Disables optional dependencies for Support like zlib and terminfo. You may -# instead want to configure them using the macros in the corresponding bzl -# files. -llvm_disable_optional_support_deps() diff --git a/utils/bazel/examples/submodule/WORKSPACE b/utils/bazel/examples/submodule/WORKSPACE --- a/utils/bazel/examples/submodule/WORKSPACE +++ b/utils/bazel/examples/submodule/WORKSPACE @@ -27,8 +27,3 @@ load("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure", "llvm_disable_optional_support_deps") llvm_configure(name = "llvm-project") - -# Disables optional dependencies for Support like zlib and terminfo. You may -# instead want to configure them using the macros in the corresponding bzl -# files. -llvm_disable_optional_support_deps() diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -274,11 +274,6 @@ deps = [ ":config", ":Demangle", - # We unconditionally depend on the custom LLVM terminfo wrapper. This - # will be an empty library unless terminfo is enabled, in which case it - # will both provide the necessary dependencies and configuration - # defines. - "@llvm_terminfo//:terminfo", # We unconditionally depend on the custom LLVM zlib wrapper. This will # be an empty library unless zlib is enabled, in which case it will # both provide the necessary dependencies and configuration defines. diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6164,6 +6164,14 @@ ["-gen-op-interface-defs"], "include/mlir/Interfaces/MemorySlotOpInterfaces.cpp.inc", ), + ( + ["-gen-type-interface-decls"], + "include/mlir/Interfaces/MemorySlotTypeInterfaces.h.inc", + ), + ( + ["-gen-type-interface-defs"], + "include/mlir/Interfaces/MemorySlotTypeInterfaces.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Interfaces/MemorySlotInterfaces.td", @@ -8274,6 +8282,7 @@ ], deps = [ ":ControlFlowInterfaces", + ":FuncDialect", ":IR", ":LLVMDialect", ":OpenMPInterfacesIncGen", @@ -10524,11 +10533,11 @@ ], includes = ["include"], deps = [ - ":MemorySlotInterfacesTdFiles", ":ArithOpsTdFiles", ":CastInterfacesTdFiles", ":ControlFlowInterfacesTdFiles", ":CopyOpInterfaceTdFiles", + ":MemorySlotInterfacesTdFiles", ":OpBaseTdFiles", ":ShapedOpInterfacesTdFiles", ":SideEffectInterfacesTdFiles", @@ -10604,9 +10613,9 @@ ":DialectUtils", ":IR", ":InferTypeOpInterface", - ":MemorySlotInterfaces", ":MemRefBaseIncGen", ":MemRefOpsIncGen", + ":MemorySlotInterfaces", ":ShapedOpInterfaces", ":ValueBoundsOpInterface", ":ViewLikeInterface", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -386,9 +386,12 @@ "//llvm:Support", "//mlir:Analysis", "//mlir:ArithDialect", + "//mlir:BytecodeReader", + "//mlir:BytecodeWriter", "//mlir:FuncDialect", "//mlir:IR", "//mlir:LinalgDialect", + "//mlir:Parser", "//mlir:Pass", "//mlir:Support", ], diff --git a/utils/bazel/terminfo.bzl b/utils/bazel/terminfo.bzl deleted file mode 100644 --- a/utils/bazel/terminfo.bzl +++ /dev/null @@ -1,203 +0,0 @@ -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -"""Repository rules to configure the terminfo used by LLVM. - -Most users should pick one of the explicit rules to configure their use of terminfo -with LLVM: -- `llvm_terminfo_system` will detect and link against a terminfo-implementing - system library (non-hermetically). -- 'llvm_terminfo_disable` will disable terminfo completely. - -If you would like to make your build configurable, you can use -`llvm_terminfo_from_env`. By default, this will disable terminfo, but will -inspect the environment variable (most easily set with a `--repo_env` flag to -the Bazel invocation) `BAZEL_LLVM_TERMINFO_STRATEGY`. If it is set to -`system` then it will behave the same as `llvm_terminfo_system`. Any other -setting will disable terminfo the same as not setting it at all. -""" - -def _llvm_terminfo_disable_impl(repository_ctx): - repository_ctx.template( - "BUILD", - repository_ctx.attr._disable_build_template, - executable = False, - ) - -_terminfo_disable_attrs = { - "_disable_build_template": attr.label( - default = "@llvm-raw//utils/bazel/deps_impl:terminfo_disable.BUILD", - allow_single_file = True, - ), -} - -llvm_terminfo_disable = repository_rule( - implementation = _llvm_terminfo_disable_impl, - attrs = _terminfo_disable_attrs, -) - -def _find_c_compiler(repository_ctx): - """Returns the path to a plausible C compiler. - - This routine will only reliably work on roughly POSIX-y systems as it - ultimately falls back on the `cc` binary. Fortunately, the thing we are - trying to use it for (detecting if a trivial source file can compile and - link against a particular library) requires very little. - """ - cc_env = repository_ctx.os.environ.get("CC") - cc = None - if cc_env: - if "/" in cc_env: - return repository_ctx.path(cc_env) - else: - return repository_ctx.which(cc_env) - - # Look for Clang, GCC, and the POSIX / UNIX specified C compiler - # binaries. - for compiler in ["clang", "gcc", "c99", "c89", "cc"]: - cc = repository_ctx.which(compiler) - if cc: - return cc - - return None - -def _try_link(repository_ctx, cc, source, linker_flags): - """Returns `True` if able to link the source with the linker flag. - - Given a source file that contains references to library routines, this - will check that when linked with the provided linker flag, those - references are successfully resolved. This routine assumes a generally - POSIX-y and GCC-ish compiler and environment and shouldn't be expected to - work outside of that. - """ - cmd = [ - cc, - # Force discard the linked executable. - "-o", - "/dev/null", - # Leave language detection to the compiler. - source, - ] - - # The linker flag must be valid for a compiler invocation of the link step, - # so just append them to the command. - cmd += linker_flags - exec_result = repository_ctx.execute(cmd, timeout = 20) - return exec_result.return_code == 0 - -def _llvm_terminfo_system_impl(repository_ctx): - # LLVM doesn't need terminfo support on Windows, so just disable it. - if repository_ctx.os.name.lower().find("windows") != -1: - _llvm_terminfo_disable_impl(repository_ctx) - return - - if len(repository_ctx.attr.system_linkopts) > 0: - linkopts = repository_ctx.attr.system_linkopts - else: - required = repository_ctx.attr.system_required - - # Find a C compiler we can use to detect viable linkopts on this system. - cc = _find_c_compiler(repository_ctx) - if not cc: - if required: - fail("Failed to find a C compiler executable") - else: - _llvm_terminfo_disable_impl(repository_ctx) - return - - # Get the source file we use to detect successful linking of terminfo. - source = repository_ctx.path(repository_ctx.attr._terminfo_test_source) - - # Collect the candidate linkopts and wrap them into a list. Ideally, - # these would be provided as lists, but Bazel doesn't currently - # support that. See: https://github.com/bazelbuild/bazel/issues/12178 - linkopts_candidates = [[x] for x in repository_ctx.attr.candidate_system_linkopts] - linkopts = None - - # For each candidate, try to use it to link our test source file. - for linkopts_candidate in linkopts_candidates: - if _try_link(repository_ctx, cc, source, linkopts_candidate): - linkopts = linkopts_candidate - break - - # If we never found a viable linkopts candidate, either error or disable - # terminfo for LLVM. - if not linkopts: - if required: - fail("Failed to detect which linkopt would successfully provide the " + - "necessary terminfo functionality") - else: - _llvm_terminfo_disable_impl(repository_ctx) - return - - repository_ctx.template( - "BUILD", - repository_ctx.attr._system_build_template, - substitutions = { - "{TERMINFO_LINKOPTS}": str(linkopts), - }, - executable = False, - ) - -def _merge_attrs(attrs_list): - attrs = {} - for input_attrs in attrs_list: - attrs.update(input_attrs) - return attrs - -_terminfo_system_attrs = _merge_attrs([_terminfo_disable_attrs, { - "_system_build_template": attr.label( - default = "@llvm-raw//utils/bazel/deps_impl:terminfo_system.BUILD", - allow_single_file = True, - ), - "_terminfo_test_source": attr.label( - default = "@llvm-raw//utils/bazel/deps_impl:terminfo_test.c", - allow_single_file = True, - ), - "candidate_system_linkopts": attr.string_list( - default = [ - "-lterminfo", - "-ltinfo", - "-lcurses", - "-lncurses", - "-lncursesw", - ], - doc = "Candidate linkopts to test and see if they can link " + - "successfully.", - ), - "system_required": attr.bool( - default = False, - doc = "Require that one of the candidates is detected successfully on POSIX platforms where it is needed.", - ), - "system_linkopts": attr.string_list( - default = [], - doc = "If non-empty, a specific array of linkopts to use to " + - "successfully link against the terminfo library. No " + - "detection is performed if this option is provided, it " + - "directly forces the use of these link options. No test is " + - "run to determine if they are valid or work correctly either.", - ), -}]) - -llvm_terminfo_system = repository_rule( - implementation = _llvm_terminfo_system_impl, - configure = True, - local = True, - attrs = _terminfo_system_attrs, -) - -def _llvm_terminfo_from_env_impl(repository_ctx): - terminfo_strategy = repository_ctx.os.environ.get("BAZEL_LLVM_TERMINFO_STRATEGY") - if terminfo_strategy == "system": - _llvm_terminfo_system_impl(repository_ctx) - else: - _llvm_terminfo_disable_impl(repository_ctx) - -llvm_terminfo_from_env = repository_rule( - implementation = _llvm_terminfo_from_env_impl, - configure = True, - local = True, - attrs = _merge_attrs([_terminfo_disable_attrs, _terminfo_system_attrs]), - environ = ["BAZEL_LLVM_TERMINFO_STRATEGY", "CC"], -)