diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -710,6 +710,12 @@ return result; } + void resetFPOptions(FPOptions FPO) { + CurFPFeatures = FPO; + FpPragmaStack.Stack.clear(); + FpPragmaStack.CurrentValue = FPO.getChangesFrom(FPOptions(LangOpts)); + } + // RAII object to push / pop sentinel slots for all MS #pragma stacks. // Actions should be performed only if we enter / exit a C++ method body. class PragmaStackSentinelRAII { @@ -14001,6 +14007,8 @@ CachedTokens Toks; /// The template function declaration to be late parsed. Decl *D; + /// Floating-point options in the point of definition. + FPOptions FPO; }; template <> diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -545,6 +545,9 @@ if (C.getDriver().isUsingLTO()) addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], C.getDriver().getLTOMode() == LTOK_Thin); + else if (Args.hasArg(options::OPT_mcpu_EQ)) + CmdArgs.push_back(Args.MakeArgString( + "-plugin-opt=mcpu=" + Args.getLastArgValue(options::OPT_mcpu_EQ))); CmdArgs.push_back("--no-undefined"); CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -1742,6 +1742,10 @@ Actions.PushDeclContext(Actions.getCurScope(), DC); } + // Parsing should occur with empty FP pragma stack and FP options used in the + // point of the template definition. + Actions.resetFPOptions(LPT.FPO); + assert(!LPT.Toks.empty() && "Empty body!"); // Append the current token at the end of the new token stream so that it diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -11342,6 +11342,7 @@ // Take tokens to avoid allocations LPT->Toks.swap(Toks); LPT->D = FnD; + LPT->FPO = getCurFPFeatures(); LateParsedTemplateMap.insert(std::make_pair(FD, std::move(LPT))); FD->setLateTemplateParsed(true); diff --git a/clang/test/CodeGen/fp-template.cpp b/clang/test/CodeGen/fp-template.cpp --- a/clang/test/CodeGen/fp-template.cpp +++ b/clang/test/CodeGen/fp-template.cpp @@ -15,4 +15,40 @@ // CHECK-SAME: (float noundef %{{.*}}, float noundef %{{.*}}) #[[ATTR01:[0-9]+]]{{.*}} { // CHECK: call float @llvm.experimental.constrained.fadd.f32 + +template +Ty templ_02(Ty x, Ty y) { + return x + y; +} + +#pragma STDC FENV_ROUND FE_UPWARD + +template +Ty templ_03(Ty x, Ty y) { + return x - y; +} + +#pragma STDC FENV_ROUND FE_TONEAREST + +float func_02(float x, float y) { + return templ_02(x, y); +} + +// CHECK-LABEL: define {{.*}} float @_Z8templ_02IfET_S0_S0_ +// CHECK: %add = fadd float %0, %1 + +float func_03(float x, float y) { + return templ_03(x, y); +} + +// CHECK-LABEL: define {{.*}} float @_Z8templ_03IfET_S0_S0_ +// CHECK: call float @llvm.experimental.constrained.fsub.f32({{.*}}, metadata !"round.upward", metadata !"fpexcept.ignore") + + +// This pragma sets non-default rounding mode before delayed parsing occurs. It +// is used to check that the parsing uses FP options defined by command line +// options or by pragma before the template definition but not by this pragma. +#pragma STDC FENV_ROUND FE_TOWARDZERO + + // CHECK: attributes #[[ATTR01]] = { {{.*}}strictfp diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c --- a/clang/test/Driver/amdgpu-toolchain.c +++ b/clang/test/Driver/amdgpu-toolchain.c @@ -11,6 +11,8 @@ // DWARF_VER: "-dwarf-version=5" // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ -// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=LTO %s +// RUN: -L. -flto -fconvergent-functions %s 2>&1 | FileCheck -check-prefixes=LTO,MCPU %s +// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ +// RUN: -L. -fconvergent-functions %s 2>&1 | FileCheck -check-prefix=MCPU %s // LTO: clang{{.*}} "-flto=full"{{.*}}"-fconvergent-functions" -// LTO: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906" +// MCPU: ld.lld{{.*}}"-L."{{.*}}"-plugin-opt=mcpu=gfx906" diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -999,9 +999,7 @@ auto stringLitOp = builder.createStringLitOp(loc, str); builder.create(loc, stringLitOp); }, - builder.createInternalLinkage()); - // TODO: This can be changed to linkonce linkage once we have support for - // generating comdat sections + builder.createLinkOnceLinkage()); auto addr = builder.create(loc, global.resultType(), global.getSymbol()); auto len = builder.createIntegerConstant( diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2856,6 +2856,14 @@ auto g = rewriter.create( loc, tyAttr, isConst, linkage, global.getSymName(), initAttr); + auto module = global->getParentOfType(); + // Add comdat if necessary + if (fir::getTargetTriple(module).supportsCOMDAT() && + (linkage == mlir::LLVM::Linkage::Linkonce || + linkage == mlir::LLVM::Linkage::LinkonceODR)) { + addComdat(g, rewriter, module); + } + // Apply all non-Fir::GlobalOp attributes to the LLVM::GlobalOp, preserving // them; whilst taking care not to apply attributes that are lowered in // other ways. @@ -2931,6 +2939,27 @@ } return mlir::LLVM::Linkage::External; } + +private: + static void addComdat(mlir::LLVM::GlobalOp &global, + mlir::ConversionPatternRewriter &rewriter, + mlir::ModuleOp &module) { + const char *comdatName = "__llvm_comdat"; + mlir::LLVM::ComdatOp comdatOp = + module.lookupSymbol(comdatName); + if (!comdatOp) { + comdatOp = + rewriter.create(module.getLoc(), comdatName); + } + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToEnd(&comdatOp.getBody().back()); + auto selectorOp = rewriter.create( + comdatOp.getLoc(), global.getSymName(), + mlir::LLVM::comdat::Comdat::Any); + global.setComdatAttr(mlir::SymbolRefAttr::get( + rewriter.getContext(), comdatName, + mlir::FlatSymbolRefAttr::get(selectorOp.getSymNameAttr()))); + } }; /// `fir.load` --> `llvm.load` diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -968,7 +968,7 @@ return mlir::dyn_cast(ty).getLen(); else if (mlir::isa(ty)) { // fir.vector only supports 1-D vector - if (mlir::dyn_cast(ty).getNumScalableDims() == 0) + if (!(mlir::dyn_cast(ty).isScalable())) return mlir::dyn_cast(ty).getShape()[0]; } diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -1,7 +1,9 @@ -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT //============================================================================= // SUMMARY: Tests for FIR --> LLVM MLIR conversion independent of the target @@ -49,7 +51,8 @@ // ----- fir.global linkonce @w_i86 (86:i32) : i32 -// CHECK: llvm.mlir.global linkonce @w_i86(86 : i32) {addr_space = 0 : i32} : i32 +// CHECK-COMDAT: llvm.mlir.global linkonce @w_i86(86 : i32) comdat(@__llvm_comdat::@w_i86) {addr_space = 0 : i32} : i32 +// CHECK-NO-COMDAT: llvm.mlir.global linkonce @w_i86(86 : i32) {addr_space = 0 : i32} : i32 // ----- @@ -1678,7 +1681,8 @@ return } -// CHECK: llvm.mlir.global linkonce constant @_QMtest_dinitE.dt.tseq() {addr_space = 0 : i32} : i8 +// CHECK-COMDAT: llvm.mlir.global linkonce constant @_QMtest_dinitE.dt.tseq() comdat(@__llvm_comdat::@_QMtest_dinitE.dt.tseq) {addr_space = 0 : i32} : i8 +// CHECK-NO-COMDAT: llvm.mlir.global linkonce constant @_QMtest_dinitE.dt.tseq() {addr_space = 0 : i32} : i8 // CHECK-LABEL: llvm.func @embox1 // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(42 : i32) : i32 // CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8 diff --git a/flang/test/Fir/tbaa.fir b/flang/test/Fir/tbaa.fir --- a/flang/test/Fir/tbaa.fir +++ b/flang/test/Fir/tbaa.fir @@ -195,7 +195,7 @@ // CHECK: llvm.func @_FortranAioOutputDescriptor(!llvm.ptr, !llvm.ptr>, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)>>) -> i1 attributes {fir.io, fir.runtime, sym_visibility = "private"} // CHECK: llvm.func @_FortranAioEndIoStatement(!llvm.ptr) -> i32 attributes {fir.io, fir.runtime, sym_visibility = "private"} -// CHECK-LABEL: llvm.mlir.global linkonce constant @_QQcl.2E2F64756D6D792E66393000() {addr_space = 0 : i32} : !llvm.array<12 x i8> { +// CHECK-LABEL: llvm.mlir.global linkonce constant @_QQcl.2E2F64756D6D792E66393000() comdat(@__llvm_comdat::@_QQcl.2E2F64756D6D792E66393000) {addr_space = 0 : i32} : !llvm.array<12 x i8> { // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant("./dummy.f90\00") : !llvm.array<12 x i8> // CHECK: llvm.return %[[VAL_0]] : !llvm.array<12 x i8> // CHECK: } diff --git a/flang/test/Lower/allocatable-assignment.f90 b/flang/test/Lower/allocatable-assignment.f90 --- a/flang/test/Lower/allocatable-assignment.f90 +++ b/flang/test/Lower/allocatable-assignment.f90 @@ -1242,7 +1242,7 @@ ! CHECK: } end subroutine -! CHECK: fir.global internal @[[error_message]] constant : !fir.char<1,76> { +! CHECK: fir.global linkonce @[[error_message]] constant : !fir.char<1,76> { ! CHECK: %[[msg:.*]] = fir.string_lit "array left hand side must be allocated when the right hand side is a scalar\00"(76) : !fir.char<1,76> ! CHECK: fir.has_value %[[msg:.*]] : !fir.char<1,76> ! CHECK: } diff --git a/flang/test/Lower/character-assignment.f90 b/flang/test/Lower/character-assignment.f90 --- a/flang/test/Lower/character-assignment.f90 +++ b/flang/test/Lower/character-assignment.f90 @@ -102,7 +102,7 @@ ! CHECK: return end subroutine -! CHECK-LABEL: fir.global internal @_QQcl.48656C6C6F20576F726C64 +! CHECK-LABEL: fir.global linkonce @_QQcl.48656C6C6F20576F726C64 ! CHECK: %[[lit:.*]] = fir.string_lit "Hello World"(11) : !fir.char<1,11> ! CHECK: fir.has_value %[[lit]] : !fir.char<1,11> ! CHECK: } diff --git a/flang/test/Lower/convert.f90 b/flang/test/Lower/convert.f90 --- a/flang/test/Lower/convert.f90 +++ b/flang/test/Lower/convert.f90 @@ -21,11 +21,11 @@ ! ALL: %[[VAL_8:.*]] = fir.insert_value %[[VAL_4]], %[[VAL_7]], [0 : index, 1 : index] : (!fir.array<1xtuple, !fir.ref>>, !fir.ref) -> !fir.array<1xtuple, !fir.ref>> ! ALL: fir.has_value %[[VAL_8]] : !fir.array<1xtuple, !fir.ref>> -! ALL: fir.global internal @[[FC_STR]] constant : !fir.char<1,13> { +! ALL: fir.global linkonce @[[FC_STR]] constant : !fir.char<1,13> { ! ALL: %[[VAL_0:.*]] = fir.string_lit "FORT_CONVERT\00"(13) : !fir.char<1,13> ! ALL: fir.has_value %[[VAL_0]] : !fir.char<1,13> -! ALL: fir.global internal @[[OPT_STR]] constant : !fir.char<1,[[OPT_STR_LEN]]> { +! ALL: fir.global linkonce @[[OPT_STR]] constant : !fir.char<1,[[OPT_STR_LEN]]> { ! UNKNOWN: %[[VAL_0:.*]] = fir.string_lit "UNKNOWN\00"([[OPT_STR_LEN]]) : !fir.char<1,[[OPT_STR_LEN]]> ! NATIVE: %[[VAL_0:.*]] = fir.string_lit "NATIVE\00"([[OPT_STR_LEN]]) : !fir.char<1,[[OPT_STR_LEN]]> ! LITTLE_ENDIAN: %[[VAL_0:.*]] = fir.string_lit "LITTLE_ENDIAN\00"([[OPT_STR_LEN]]) : !fir.char<1,[[OPT_STR_LEN]]> diff --git a/flang/test/Lower/global-format-strings.f90 b/flang/test/Lower/global-format-strings.f90 --- a/flang/test/Lower/global-format-strings.f90 +++ b/flang/test/Lower/global-format-strings.f90 @@ -8,7 +8,7 @@ ! CHECK: fir.address_of(@{{.*}}) : 1008 format('ok') end -! CHECK-LABEL: fir.global internal @_QQcl.28276F6B2729 constant +! CHECK-LABEL: fir.global linkonce @_QQcl.28276F6B2729 constant ! CHECK: %[[lit:.*]] = fir.string_lit "('ok')"(6) : !fir.char<1,6> ! CHECK: fir.has_value %[[lit]] : !fir.char<1,6> ! CHECK: } diff --git a/flang/test/Lower/io-statement-open-options.f90 b/flang/test/Lower/io-statement-open-options.f90 --- a/flang/test/Lower/io-statement-open-options.f90 +++ b/flang/test/Lower/io-statement-open-options.f90 @@ -15,6 +15,6 @@ close(unit) end subroutine -! CHECK: fir.global internal @[[be_str_name]] constant : !fir.char<1,10> { +! CHECK: fir.global linkonce @[[be_str_name]] constant : !fir.char<1,10> { ! CHECK: %[[be_str_lit:.*]] = fir.string_lit "BIG_ENDIAN"(10) : !fir.char<1,10> ! CHECK: fir.has_value %[[be_str_lit]] : !fir.char<1,10> diff --git a/flang/test/Lower/namelist.f90 b/flang/test/Lower/namelist.f90 --- a/flang/test/Lower/namelist.f90 +++ b/flang/test/Lower/namelist.f90 @@ -83,6 +83,6 @@ write(10, nml=mygroup) end - ! CHECK-DAG: fir.global internal @_QQcl.6A6A6A00 constant : !fir.char<1,4> - ! CHECK-DAG: fir.global internal @_QQcl.63636300 constant : !fir.char<1,4> - ! CHECK-DAG: fir.global internal @_QQcl.6E6E6E00 constant : !fir.char<1,4> + ! CHECK-DAG: fir.global linkonce @_QQcl.6A6A6A00 constant : !fir.char<1,4> + ! CHECK-DAG: fir.global linkonce @_QQcl.63636300 constant : !fir.char<1,4> + ! CHECK-DAG: fir.global linkonce @_QQcl.6E6E6E00 constant : !fir.char<1,4> diff --git a/flang/test/Lower/read-write-buffer.f90 b/flang/test/Lower/read-write-buffer.f90 --- a/flang/test/Lower/read-write-buffer.f90 +++ b/flang/test/Lower/read-write-buffer.f90 @@ -29,7 +29,7 @@ write (buffer, 10) "compiler" read (buffer, 10) greeting end -! CHECK-LABEL: fir.global internal @_QQcl.636F6D70696C6572 +! CHECK-LABEL: fir.global linkonce @_QQcl.636F6D70696C6572 ! CHECK: %[[lit:.*]] = fir.string_lit "compiler"(8) : !fir.char<1,8> ! CHECK: fir.has_value %[[lit]] : !fir.char<1,8> ! CHECK: } diff --git a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp --- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp +++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp @@ -311,7 +311,7 @@ auto symbol = addrOp.getSymbol().getRootReference().getValue(); auto global = builder.getNamedGlobal(symbol); EXPECT_EQ( - builder.createInternalLinkage().getValue(), global.getLinkName().value()); + builder.createLinkOnceLinkage().getValue(), global.getLinkName().value()); EXPECT_EQ(fir::CharacterType::get(builder.getContext(), 1, strValue.size()), global.getType()); diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -15,9 +15,12 @@ SRCS sqrtf_test.cpp DEPENDS + .exhaustive_test libc.include.math libc.src.math.sqrtf libc.src.__support.FPUtil.fp_bits + LINK_LIBRARIES + -lpthread ) add_fp_unittest( diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h @@ -11,6 +11,7 @@ #include "Plugins/SymbolFile/DWARF/DWARFIndex.h" #include "Plugins/SymbolFile/DWARF/HashedNameToDIE.h" +#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" namespace lldb_private { class AppleDWARFIndex : public DWARFIndex { @@ -20,11 +21,11 @@ DWARFDataExtractor apple_namespaces, DWARFDataExtractor apple_types, DWARFDataExtractor apple_objc, DWARFDataExtractor debug_str); - AppleDWARFIndex( - Module &module, std::unique_ptr apple_names, - std::unique_ptr apple_namespaces, - std::unique_ptr apple_types, - std::unique_ptr apple_objc) + AppleDWARFIndex(Module &module, + std::unique_ptr apple_names, + std::unique_ptr apple_namespaces, + std::unique_ptr apple_types, + std::unique_ptr apple_objc) : DWARFIndex(module), m_apple_names_up(std::move(apple_names)), m_apple_namespaces_up(std::move(apple_namespaces)), m_apple_types_up(std::move(apple_types)), @@ -62,10 +63,20 @@ void Dump(Stream &s) override; private: - std::unique_ptr m_apple_names_up; - std::unique_ptr m_apple_namespaces_up; - std::unique_ptr m_apple_types_up; + std::unique_ptr m_apple_names_up; + std::unique_ptr m_apple_namespaces_up; + std::unique_ptr m_apple_types_up; std::unique_ptr m_apple_objc_up; + + /// Search for entries whose name is `name` in `table`, calling `callback` for + /// each match. If `search_for_tag` is provided, ignore entries whose tag is + /// not `search_for_tag`. If `search_for_qualhash` is provided, ignore entries + /// whose qualified name hash does not match `search_for_qualhash`. + /// If `callback` returns false for an entry, the search is interrupted. + void SearchFor(const llvm::AppleAcceleratorTable &table, llvm::StringRef name, + llvm::function_ref callback, + std::optional search_for_tag = std::nullopt, + std::optional search_for_qualhash = std::nullopt); }; } // namespace lldb_private diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -22,27 +22,35 @@ Module &module, DWARFDataExtractor apple_names, DWARFDataExtractor apple_namespaces, DWARFDataExtractor apple_types, DWARFDataExtractor apple_objc, DWARFDataExtractor debug_str) { - auto apple_names_table_up = std::make_unique( - apple_names, debug_str, ".apple_names"); - if (!apple_names_table_up->IsValid()) - apple_names_table_up.reset(); + + llvm::DataExtractor llvm_debug_str = debug_str.GetAsLLVM(); + + auto apple_names_table_up = std::make_unique( + apple_names.GetAsLLVMDWARF(), llvm_debug_str); auto apple_namespaces_table_up = - std::make_unique( - apple_namespaces, debug_str, ".apple_namespaces"); - if (!apple_namespaces_table_up->IsValid()) - apple_namespaces_table_up.reset(); + std::make_unique( + apple_namespaces.GetAsLLVMDWARF(), llvm_debug_str); - auto apple_types_table_up = std::make_unique( - apple_types, debug_str, ".apple_types"); - if (!apple_types_table_up->IsValid()) - apple_types_table_up.reset(); + auto apple_types_table_up = std::make_unique( + apple_types.GetAsLLVMDWARF(), llvm_debug_str); auto apple_objc_table_up = std::make_unique( apple_objc, debug_str, ".apple_objc"); if (!apple_objc_table_up->IsValid()) apple_objc_table_up.reset(); + auto extract_and_check = [](auto &TablePtr) { + if (auto E = TablePtr->extract()) { + llvm::consumeError(std::move(E)); + TablePtr.reset(); + } + }; + + extract_and_check(apple_names_table_up); + extract_and_check(apple_namespaces_table_up); + extract_and_check(apple_types_table_up); + if (apple_names_table_up || apple_namespaces_table_up || apple_types_table_up || apple_objc_table_up) return std::make_unique( @@ -53,13 +61,76 @@ return nullptr; } +/// Returns true if `tag` is a class_type of structure_type tag. +static bool IsClassOrStruct(dw_tag_t tag) { + return tag == DW_TAG_class_type || tag == DW_TAG_structure_type; +} + +/// Returns true if `entry` has an extractable DW_ATOM_qual_name_hash and it +/// matches `expected_hash`. +static bool +EntryHasMatchingQualhash(const llvm::AppleAcceleratorTable::Entry &entry, + uint32_t expected_hash) { + std::optional form_value = + entry.lookup(dwarf::DW_ATOM_qual_name_hash); + if (!form_value) + return false; + std::optional hash = form_value->getAsUnsignedConstant(); + return hash && (*hash == expected_hash); +} + +/// Returns true if `entry` has an extractable DW_ATOM_die_tag and it matches +/// `expected_tag`. We also consider it a match if the tags are different but +/// in the set of {TAG_class_type, TAG_struct_type}. +static bool EntryHasMatchingTag(const llvm::AppleAcceleratorTable::Entry &entry, + dw_tag_t expected_tag) { + std::optional form_value = + entry.lookup(dwarf::DW_ATOM_die_tag); + if (!form_value) + return false; + std::optional maybe_tag = form_value->getAsUnsignedConstant(); + if (!maybe_tag) + return false; + auto tag = static_cast(*maybe_tag); + return tag == expected_tag || + (IsClassOrStruct(tag) && IsClassOrStruct(expected_tag)); +} + +/// Returns true if `entry` has an extractable DW_ATOM_type_flags and the flag +/// "DW_FLAG_type_implementation" is set. +static bool +HasImplementationFlag(const llvm::AppleAcceleratorTable::Entry &entry) { + std::optional form_value = + entry.lookup(dwarf::DW_ATOM_type_flags); + if (!form_value) + return false; + std::optional Flags = form_value->getAsUnsignedConstant(); + return Flags && + (*Flags & llvm::dwarf::AcceleratorTable::DW_FLAG_type_implementation); +} + +void AppleDWARFIndex::SearchFor(const llvm::AppleAcceleratorTable &table, + llvm::StringRef name, + llvm::function_ref callback, + std::optional search_for_tag, + std::optional search_for_qualhash) { + auto converted_cb = DIERefCallback(callback, name); + for (const auto &entry : table.equal_range(name)) { + if (search_for_qualhash && + !EntryHasMatchingQualhash(entry, *search_for_qualhash)) + continue; + if (search_for_tag && !EntryHasMatchingTag(entry, *search_for_tag)) + continue; + if (!converted_cb(entry)) + break; + } +} + void AppleDWARFIndex::GetGlobalVariables( ConstString basename, llvm::function_ref callback) { if (!m_apple_names_up) return; - m_apple_names_up->FindByName( - basename.GetStringRef(), - DIERefCallback(callback, basename.GetStringRef())); + SearchFor(*m_apple_names_up, basename, callback); } void AppleDWARFIndex::GetGlobalVariables( @@ -68,11 +139,13 @@ if (!m_apple_names_up) return; - DWARFMappedHash::DIEInfoArray hash_data; - m_apple_names_up->AppendAllDIEsThatMatchingRegex(regex, hash_data); - // This is not really the DIE name. - DWARFMappedHash::ExtractDIEArray(hash_data, - DIERefCallback(callback, regex.GetText())); + DIERefCallbackImpl converted_cb = DIERefCallback(callback, regex.GetText()); + + for (const auto &entry : m_apple_names_up->entries()) + if (std::optional name = entry.readName(); + name && Mangled(*name).NameMatches(regex)) + if (!converted_cb(entry.BaseEntry)) + return; } void AppleDWARFIndex::GetGlobalVariables( @@ -81,11 +154,18 @@ return; const DWARFUnit &non_skeleton_cu = cu.GetNonSkeletonUnit(); - DWARFMappedHash::DIEInfoArray hash_data; - m_apple_names_up->AppendAllDIEsInRange(non_skeleton_cu.GetOffset(), - non_skeleton_cu.GetNextUnitOffset(), - hash_data); - DWARFMappedHash::ExtractDIEArray(hash_data, DIERefCallback(callback)); + dw_offset_t lower_bound = non_skeleton_cu.GetOffset(); + dw_offset_t upper_bound = non_skeleton_cu.GetNextUnitOffset(); + auto is_in_range = [lower_bound, upper_bound](std::optional val) { + return val.has_value() && *val >= lower_bound && *val < upper_bound; + }; + + DIERefCallbackImpl converted_cb = DIERefCallback(callback); + for (auto entry : m_apple_names_up->entries()) { + if (is_in_range(entry.BaseEntry.getDIESectionOffset())) + if (!converted_cb(entry.BaseEntry)) + return; + } } void AppleDWARFIndex::GetObjCMethods( @@ -102,18 +182,32 @@ llvm::function_ref callback) { if (!m_apple_types_up) return; - m_apple_types_up->FindCompleteObjCClassByName( - class_name.GetStringRef(), - DIERefCallback(callback, class_name.GetStringRef()), - must_be_implementation); + + llvm::SmallVector decl_dies; + auto converted_cb = DIERefCallback(callback, class_name); + + for (const auto &entry : m_apple_types_up->equal_range(class_name)) { + if (HasImplementationFlag(entry)) { + converted_cb(entry); + return; + } + + decl_dies.emplace_back(std::nullopt, DIERef::Section::DebugInfo, + *entry.getDIESectionOffset()); + } + + if (must_be_implementation) + return; + for (DIERef ref : decl_dies) + if (!converted_cb(ref)) + return; } void AppleDWARFIndex::GetTypes( ConstString name, llvm::function_ref callback) { if (!m_apple_types_up) return; - m_apple_types_up->FindByName(name.GetStringRef(), - DIERefCallback(callback, name.GetStringRef())); + SearchFor(*m_apple_types_up, name, callback); } void AppleDWARFIndex::GetTypes( @@ -123,82 +217,77 @@ return; Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); - const bool has_tag = m_apple_types_up->GetHeader().header_data.ContainsAtom( - DWARFMappedHash::eAtomTypeTag); - const bool has_qualified_name_hash = - m_apple_types_up->GetHeader().header_data.ContainsAtom( - DWARFMappedHash::eAtomTypeQualNameHash); - - const ConstString type_name(context[0].name); - const dw_tag_t tag = context[0].tag; - if (has_tag && has_qualified_name_hash) { - const char *qualified_name = context.GetQualifiedName(); - const uint32_t qualified_name_hash = llvm::djbHash(qualified_name); + const bool entries_have_tag = + m_apple_types_up->containsAtomType(DW_ATOM_die_tag); + const bool entries_have_qual_hash = + m_apple_types_up->containsAtomType(DW_ATOM_qual_name_hash); + + llvm::StringRef expected_name = context[0].name; + + if (entries_have_tag && entries_have_qual_hash) { + const dw_tag_t expected_tag = context[0].tag; + const uint32_t expected_qualname_hash = + llvm::djbHash(context.GetQualifiedName()); if (log) m_module.LogMessage(log, "FindByNameAndTagAndQualifiedNameHash()"); - m_apple_types_up->FindByNameAndTagAndQualifiedNameHash( - type_name.GetStringRef(), tag, qualified_name_hash, - DIERefCallback(callback, type_name.GetStringRef())); + SearchFor(*m_apple_types_up, expected_name, callback, expected_tag, + expected_qualname_hash); return; } - if (has_tag) { - // When searching for a scoped type (for example, - // "std::vector::const_iterator") searching for the innermost - // name alone ("const_iterator") could yield many false - // positives. By searching for the parent type ("vector") - // first we can avoid extracting type DIEs from object files that - // would fail the filter anyway. - if (!has_qualified_name_hash && (context.GetSize() > 1) && - (context[1].tag == DW_TAG_class_type || - context[1].tag == DW_TAG_structure_type)) { - if (m_apple_types_up->FindByName(context[1].name, - [&](DIERef ref) { return false; })) - return; - } - - if (log) - m_module.LogMessage(log, "FindByNameAndTag()"); - m_apple_types_up->FindByNameAndTag( - type_name.GetStringRef(), tag, - DIERefCallback(callback, type_name.GetStringRef())); + // Historically, if there are no tags, we also ignore qual_hash (why?) + if (!entries_have_tag) { + SearchFor(*m_apple_names_up, expected_name, callback); return; } - m_apple_types_up->FindByName( - type_name.GetStringRef(), - DIERefCallback(callback, type_name.GetStringRef())); + // We have a tag but no qual hash. + + // When searching for a scoped type (for example, + // "std::vector::const_iterator") searching for the innermost + // name alone ("const_iterator") could yield many false + // positives. By searching for the parent type ("vector") + // first we can avoid extracting type DIEs from object files that + // would fail the filter anyway. + if ((context.GetSize() > 1) && IsClassOrStruct(context[1].tag)) + if (m_apple_types_up->equal_range(context[1].name).empty()) + return; + + if (log) + m_module.LogMessage(log, "FindByNameAndTag()"); + const dw_tag_t expected_tag = context[0].tag; + SearchFor(*m_apple_types_up, expected_name, callback, expected_tag); + return; } void AppleDWARFIndex::GetNamespaces( ConstString name, llvm::function_ref callback) { if (!m_apple_namespaces_up) return; - m_apple_namespaces_up->FindByName( - name.GetStringRef(), DIERefCallback(callback, name.GetStringRef())); + SearchFor(*m_apple_namespaces_up, name, callback); } void AppleDWARFIndex::GetFunctions( const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, const CompilerDeclContext &parent_decl_ctx, llvm::function_ref callback) { + if (!m_apple_names_up) + return; + ConstString name = lookup_info.GetLookupName(); - m_apple_names_up->FindByName(name.GetStringRef(), [&](DIERef die_ref) { - return ProcessFunctionDIE(lookup_info, die_ref, dwarf, parent_decl_ctx, - callback); - }); + for (const auto &entry : m_apple_names_up->equal_range(name)) { + DIERef die_ref(std::nullopt, DIERef::Section::DebugInfo, + *entry.getDIESectionOffset()); + if (!ProcessFunctionDIE(lookup_info, die_ref, dwarf, parent_decl_ctx, + callback)) + return; + } } void AppleDWARFIndex::GetFunctions( const RegularExpression ®ex, llvm::function_ref callback) { - if (!m_apple_names_up) - return; - - DWARFMappedHash::DIEInfoArray hash_data; - m_apple_names_up->AppendAllDIEsThatMatchingRegex(regex, hash_data); - DWARFMappedHash::ExtractDIEArray(hash_data, - DIERefCallback(callback, regex.GetText())); + return GetGlobalVariables(regex, callback); } void AppleDWARFIndex::Dump(Stream &s) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -12,6 +12,7 @@ #include "Plugins/SymbolFile/DWARF/DIERef.h" #include "Plugins/SymbolFile/DWARF/DWARFDIE.h" #include "Plugins/SymbolFile/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "lldb/Core/Module.h" #include "lldb/Target/Statistics.h" @@ -85,6 +86,7 @@ llvm::function_ref callback, llvm::StringRef name); bool operator()(DIERef ref) const; + bool operator()(const llvm::AppleAcceleratorTable::Entry &entry) const; private: const DWARFIndex &m_index; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -99,6 +99,12 @@ return true; } +bool DWARFIndex::DIERefCallbackImpl::operator()( + const llvm::AppleAcceleratorTable::Entry &entry) const { + return this->operator()(DIERef(std::nullopt, DIERef::Section::DebugInfo, + *entry.getDIESectionOffset())); +} + void DWARFIndex::ReportInvalidDIERef(DIERef ref, llvm::StringRef name) const { m_module.ReportErrorIfModifyDetected( "the DWARF debug information has been modified (accelerator table had " diff --git a/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h --- a/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h @@ -132,35 +132,10 @@ bool ReadHashData(uint32_t hash_data_offset, HashData &hash_data) const override; - void - AppendAllDIEsThatMatchingRegex(const lldb_private::RegularExpression ®ex, - DIEInfoArray &die_info_array) const; - - void AppendAllDIEsInRange(const uint32_t die_offset_start, - const uint32_t die_offset_end, - DIEInfoArray &die_info_array) const; - bool FindByName(llvm::StringRef name, llvm::function_ref callback); - void FindByNameAndTag(llvm::StringRef name, const dw_tag_t tag, - llvm::function_ref callback); - - void FindByNameAndTagAndQualifiedNameHash( - llvm::StringRef name, const dw_tag_t tag, - const uint32_t qualified_name_hash, - llvm::function_ref callback); - - void - FindCompleteObjCClassByName(llvm::StringRef name, - llvm::function_ref callback, - bool must_be_implementation); - protected: - Result AppendHashDataForRegularExpression( - const lldb_private::RegularExpression ®ex, - lldb::offset_t *hash_data_offset_ptr, Pair &pair) const; - void FindByName(llvm::StringRef name, DIEInfoArray &die_info_array); Result GetHashDataForName(llvm::StringRef name, @@ -176,25 +151,6 @@ llvm::function_ref callback); protected: - static void ExtractDIEArray(const DIEInfoArray &die_info_array, - const dw_tag_t tag, - llvm::function_ref callback); - - static void ExtractDIEArray(const DIEInfoArray &die_info_array, - const dw_tag_t tag, - const uint32_t qualified_name_hash, - llvm::function_ref callback); - - static void - ExtractClassOrStructDIEArray(const DIEInfoArray &die_info_array, - bool return_implementation_only_if_available, - llvm::function_ref callback); - - static void - ExtractTypesFromDIEArray(const DIEInfoArray &die_info_array, - uint32_t type_flag_mask, uint32_t type_flag_value, - llvm::function_ref callback); - static const char *GetAtomTypeName(uint16_t atom); }; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp --- a/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp @@ -23,92 +23,6 @@ return true; } -void DWARFMappedHash::ExtractDIEArray( - const DIEInfoArray &die_info_array, const dw_tag_t tag, - llvm::function_ref callback) { - if (tag == 0) { - ExtractDIEArray(die_info_array, callback); - return; - } - - const size_t count = die_info_array.size(); - for (size_t i = 0; i < count; ++i) { - const dw_tag_t die_tag = die_info_array[i].tag; - bool tag_matches = die_tag == 0 || tag == die_tag; - if (!tag_matches) { - if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) - tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type; - } - if (tag_matches) { - if (!callback(DIERef(die_info_array[i]))) - return; - } - } -} - -void DWARFMappedHash::ExtractDIEArray( - const DIEInfoArray &die_info_array, const dw_tag_t tag, - const uint32_t qualified_name_hash, - llvm::function_ref callback) { - if (tag == 0) { - ExtractDIEArray(die_info_array, callback); - return; - } - - const size_t count = die_info_array.size(); - for (size_t i = 0; i < count; ++i) { - if (qualified_name_hash != die_info_array[i].qualified_name_hash) - continue; - const dw_tag_t die_tag = die_info_array[i].tag; - bool tag_matches = die_tag == 0 || tag == die_tag; - if (!tag_matches) { - if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) - tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type; - } - if (tag_matches) { - if (!callback(DIERef(die_info_array[i]))) - return; - } - } -} - -void DWARFMappedHash::ExtractClassOrStructDIEArray( - const DIEInfoArray &die_info_array, - bool return_implementation_only_if_available, - llvm::function_ref callback) { - const size_t count = die_info_array.size(); - for (size_t i = 0; i < count; ++i) { - const dw_tag_t die_tag = die_info_array[i].tag; - if (!(die_tag == 0 || die_tag == DW_TAG_class_type || - die_tag == DW_TAG_structure_type)) - continue; - bool is_implementation = - (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) != 0; - if (is_implementation != return_implementation_only_if_available) - continue; - if (return_implementation_only_if_available) { - // We found the one true definition for this class, so only return - // that - callback(DIERef(die_info_array[i])); - return; - } - if (!callback(DIERef(die_info_array[i]))) - return; - } -} - -void DWARFMappedHash::ExtractTypesFromDIEArray( - const DIEInfoArray &die_info_array, uint32_t type_flag_mask, - uint32_t type_flag_value, llvm::function_ref callback) { - const size_t count = die_info_array.size(); - for (size_t i = 0; i < count; ++i) { - if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) { - if (!callback(DIERef(die_info_array[i]))) - return; - } - } -} - const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) { switch (atom) { case eAtomTypeNULL: @@ -404,131 +318,6 @@ } } -DWARFMappedHash::MemoryTable::Result -DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression( - const lldb_private::RegularExpression ®ex, - lldb::offset_t *hash_data_offset_ptr, Pair &pair) const { - pair.key = m_data.GetU32(hash_data_offset_ptr); - // If the key is zero, this terminates our chain of HashData objects for this - // hash value. - if (pair.key == 0) - return eResultEndOfHashData; - - // There definitely should be a string for this string offset, if there - // isn't, there is something wrong, return and error. - const char *strp_cstr = m_string_table.PeekCStr(pair.key); - if (strp_cstr == nullptr) - return eResultError; - - const uint32_t count = m_data.GetU32(hash_data_offset_ptr); - const size_t min_total_hash_data_size = - count * m_header.header_data.GetMinimumHashDataByteSize(); - if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, - min_total_hash_data_size)) { - // The name in the name table may be a mangled name, in which case we - // should also compare against the demangled version. The simplest way to - // do that is to use the Mangled class: - lldb_private::Mangled mangled_name((llvm::StringRef(strp_cstr))); - const bool match = mangled_name.NameMatches(regex); - - if (!match && m_header.header_data.HashDataHasFixedByteSize()) { - // If the regex doesn't match and we have fixed size data, we can just - // add the total byte size of all HashData objects to the hash data - // offset and be done... - *hash_data_offset_ptr += min_total_hash_data_size; - } else { - // If the string does match, or we don't have fixed size data then we - // need to read the hash data as a stream. If the string matches we also - // append all HashData objects to the value array. - for (uint32_t i = 0; i < count; ++i) { - DIEInfo die_info; - if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { - // Only happened if the HashData of the string matched... - if (match) - pair.value.push_back(die_info); - } else { - // Something went wrong while reading the data - *hash_data_offset_ptr = UINT32_MAX; - return eResultError; - } - } - } - // Return the correct response depending on if the string matched or not... - if (match) { - // The key (cstring) matches and we have lookup results! - return eResultKeyMatch; - } else { - // The key doesn't match, this function will get called again for the - // next key/value or the key terminator which in our case is a zero - // .debug_str offset. - return eResultKeyMismatch; - } - } else { - *hash_data_offset_ptr = UINT32_MAX; - return eResultError; - } -} - -void DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex( - const lldb_private::RegularExpression ®ex, - DIEInfoArray &die_info_array) const { - const uint32_t hash_count = m_header.hashes_count; - Pair pair; - for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { - lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); - while (hash_data_offset != UINT32_MAX) { - const lldb::offset_t prev_hash_data_offset = hash_data_offset; - Result hash_result = - AppendHashDataForRegularExpression(regex, &hash_data_offset, pair); - if (prev_hash_data_offset == hash_data_offset) - break; - - // Check the result of getting our hash data. - switch (hash_result) { - case eResultKeyMatch: - case eResultKeyMismatch: - // Whether we matches or not, it doesn't matter, we keep looking. - break; - - case eResultEndOfHashData: - case eResultError: - hash_data_offset = UINT32_MAX; - break; - } - } - } - die_info_array.swap(pair.value); -} - -void DWARFMappedHash::MemoryTable::AppendAllDIEsInRange( - const uint32_t die_offset_start, const uint32_t die_offset_end, - DIEInfoArray &die_info_array) const { - const uint32_t hash_count = m_header.hashes_count; - for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { - bool done = false; - lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); - while (!done && hash_data_offset != UINT32_MAX) { - KeyType key = m_data.GetU32(&hash_data_offset); - // If the key is zero, this terminates our chain of HashData objects for - // this hash value. - if (key == 0) - break; - - const uint32_t count = m_data.GetU32(&hash_data_offset); - for (uint32_t i = 0; i < count; ++i) { - DIEInfo die_info; - if (m_header.Read(m_data, &hash_data_offset, die_info)) { - if (die_info.die_offset == 0) - done = true; - if (die_offset_start <= die_info.die_offset && - die_info.die_offset < die_offset_end) - die_info_array.push_back(die_info); - } - } - } - } -} - bool DWARFMappedHash::MemoryTable::FindByName( llvm::StringRef name, llvm::function_ref callback) { if (name.empty()) @@ -539,56 +328,6 @@ return DWARFMappedHash::ExtractDIEArray(die_info_array, callback); } -void DWARFMappedHash::MemoryTable::FindByNameAndTag( - llvm::StringRef name, const dw_tag_t tag, - llvm::function_ref callback) { - DIEInfoArray die_info_array; - FindByName(name, die_info_array); - DWARFMappedHash::ExtractDIEArray(die_info_array, tag, callback); -} - -void DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash( - llvm::StringRef name, const dw_tag_t tag, - const uint32_t qualified_name_hash, - llvm::function_ref callback) { - DIEInfoArray die_info_array; - FindByName(name, die_info_array); - DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash, - callback); -} - -void DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName( - llvm::StringRef name, llvm::function_ref callback, - bool must_be_implementation) { - DIEInfoArray die_info_array; - FindByName(name, die_info_array); - if (must_be_implementation && - GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) { - // If we have two atoms, then we have the DIE offset and the type flags - // so we can find the objective C class efficiently. - DWARFMappedHash::ExtractTypesFromDIEArray( - die_info_array, UINT32_MAX, eTypeFlagClassIsImplementation, callback); - return; - } - // We don't only want the one true definition, so try and see what we can - // find, and only return class or struct DIEs. If we do have the full - // implementation, then return it alone, else return all possible - // matches. - bool found_implementation = false; - DWARFMappedHash::ExtractClassOrStructDIEArray( - die_info_array, true /*return_implementation_only_if_available*/, - [&](DIERef ref) { - found_implementation = true; - // Here the return value does not matter as we are called at most once. - return callback(ref); - }); - if (found_implementation) - return; - DWARFMappedHash::ExtractClassOrStructDIEArray( - die_info_array, false /*return_implementation_only_if_available*/, - callback); -} - void DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, DIEInfoArray &die_info_array) { if (name.empty()) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -314,6 +314,13 @@ /// Return the Atom description, which can be used to interpret the raw values /// of the Accelerator Entries in this table. ArrayRef> getAtomsDesc(); + + /// Returns true iff `AtomTy` is one of the atoms available in Entries of this + /// table. + bool containsAtomType(HeaderData::AtomType AtomTy) const { + return is_contained(make_first_range(HdrData.Atoms), AtomTy); + } + bool validateForms(); /// Return information related to the DWARF DIE we're looking for when diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1279,7 +1279,7 @@ GV->setUnnamedAddr(UnnamedAddr); if (GVal) { - if (GVal->getType() != Ty->getPointerTo(AddrSpace)) + if (GVal->getAddressSpace() != AddrSpace) return error( TyLoc, "forward reference and definition of global have different types"); diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1289,8 +1289,9 @@ } ErrorOr Res = FS->status(FName); + std::error_code EC; if (!Res || !Res->exists()) { - std::error_code EC = Res.getError(); + EC = Res.getError(); if (!InConfigFile) { // If the specified file does not exist, leave '@file' unexpanded, as // libiberty does. @@ -1306,6 +1307,11 @@ } const llvm::vfs::Status &FileStatus = Res.get(); + if (FileStatus.isDirectory()) { + EC = std::make_error_code(std::errc::is_a_directory); + return createStringError(EC, Twine("cannot not open file '") + FName + + "': " + EC.message()); + } auto IsEquivalent = [FileStatus, this](const ResponseFileRecord &RFile) -> ErrorOr { ErrorOr RHS = FS->status(RFile.File); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18616,6 +18616,10 @@ } else return SDValue(); + // If the shift amount is zero, remove the shift intrinsic. + if (ShiftAmount == 0 && IID != Intrinsic::aarch64_neon_sqshlu) + return N->getOperand(1); + unsigned Opcode; bool IsRightShift; switch (IID) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -512,10 +512,8 @@ case Instruction::Load: { Value *Ptr = cast(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); - Type *VecPtrTy = VectorTy->getPointerTo(Alloca.getAddressSpace()); - Value *BitCast = Builder.CreateBitCast(&Alloca, VecPtrTy); Value *VecValue = - Builder.CreateAlignedLoad(VectorTy, BitCast, Alloca.getAlign()); + Builder.CreateAlignedLoad(VectorTy, &Alloca, Alloca.getAlign()); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); if (Inst->getType() != VecEltTy) ExtractElement = @@ -528,15 +526,13 @@ StoreInst *SI = cast(Inst); Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); - Type *VecPtrTy = VectorTy->getPointerTo(Alloca.getAddressSpace()); - Value *BitCast = Builder.CreateBitCast(&Alloca, VecPtrTy); Value *VecValue = - Builder.CreateAlignedLoad(VectorTy, BitCast, Alloca.getAlign()); + Builder.CreateAlignedLoad(VectorTy, &Alloca, Alloca.getAlign()); Value *Elt = SI->getValueOperand(); if (Elt->getType() != VecEltTy) Elt = Builder.CreateBitOrPointerCast(Elt, VecEltTy); Value *NewVecValue = Builder.CreateInsertElement(VecValue, Elt, Index); - Builder.CreateAlignedStore(NewVecValue, BitCast, Alloca.getAlign()); + Builder.CreateAlignedStore(NewVecValue, &Alloca, Alloca.getAlign()); Inst->eraseFromParent(); break; } @@ -556,12 +552,10 @@ Mask.push_back(Idx); } } - Type *VecPtrTy = VectorTy->getPointerTo(Alloca.getAddressSpace()); - Value *BitCast = Builder.CreateBitCast(&Alloca, VecPtrTy); Value *VecValue = - Builder.CreateAlignedLoad(VectorTy, BitCast, Alloca.getAlign()); + Builder.CreateAlignedLoad(VectorTy, &Alloca, Alloca.getAlign()); Value *NewVecValue = Builder.CreateShuffleVector(VecValue, Mask); - Builder.CreateAlignedStore(NewVecValue, BitCast, Alloca.getAlign()); + Builder.CreateAlignedStore(NewVecValue, &Alloca, Alloca.getAlign()); Inst->eraseFromParent(); } else if (MemSetInst *MSI = dyn_cast(Inst)) { diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -605,6 +605,10 @@ auto MJTI = MF->getJumpTableInfo(); const std::vector &JT = MJTI->getJumpTables(); + // Only inline jump tables are placed in the function. + if (MJTI->getEntryKind() != MachineJumpTableInfo::EK_Inline) + return; + MachineBasicBlock *LastCorrectlyNumberedBB = nullptr; for (MachineBasicBlock &MBB : *MF) { auto MI = MBB.getLastNonDebugInstr(); @@ -777,6 +781,11 @@ // Compute block offsets and known bits. BBUtils->adjustBBOffsetsAfter(&MF->front()); + // We only care about jump table instructions when jump tables are inline. + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + bool InlineJumpTables = + MJTI && MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline; + // Now go back through the instructions and build up our data structures. for (MachineBasicBlock &MBB : *MF) { // If this block doesn't fall through into the next MBB, then this is @@ -799,7 +808,8 @@ continue; // Ignore other JT branches case ARM::t2BR_JT: case ARM::tBR_JTr: - T2JumpTables.push_back(&I); + if (InlineJumpTables) + T2JumpTables.push_back(&I); continue; // Does not get an entry in ImmBranches case ARM::Bcc: isCond = true; @@ -846,7 +856,8 @@ // Scan the instructions for constant pool operands. for (unsigned op = 0, e = I.getNumOperands(); op != e; ++op) - if (I.getOperand(op).isCPI() || I.getOperand(op).isJTI()) { + if (I.getOperand(op).isCPI() || + (I.getOperand(op).isJTI() && InlineJumpTables)) { // We found one. The addressing mode tells us the max displacement // from the PC that this instruction permits. diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" @@ -1041,6 +1042,15 @@ Lower0_7 = Lower0_7.addExternalSymbol(ES, TF | ARMII::MO_LO_0_7); break; } + case MachineOperand::MO_JumpTableIndex: { + unsigned Idx = MO.getIndex(); + unsigned TF = MO.getTargetFlags(); + Upper8_15 = Upper8_15.addJumpTableIndex(Idx, TF | ARMII::MO_HI_8_15); + Upper0_7 = Upper0_7.addJumpTableIndex(Idx, TF | ARMII::MO_HI_0_7); + Lower8_15 = Lower8_15.addJumpTableIndex(Idx, TF | ARMII::MO_LO_8_15); + Lower0_7 = Lower0_7.addJumpTableIndex(Idx, TF | ARMII::MO_LO_0_7); + break; + } default: { const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); @@ -2764,6 +2774,17 @@ ExpandTMOV32BitImm(MBB, MBBI); return true; + case ARM::tLEApcrelJT: + // Inline jump tables are handled in ARMAsmPrinter. + if (MI.getMF()->getJumpTableInfo()->getEntryKind() == + MachineJumpTableInfo::EK_Inline) + return false; + + // Use a 32-bit immediate move to generate the address of the jump table. + assert(STI->isThumb() && "Non-inline jump tables expected only in thumb"); + ExpandTMOV32BitImm(MBB, MBBI); + return true; + case ARM::SUBS_PC_LR: { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3484,6 +3484,11 @@ } unsigned ARMTargetLowering::getJumpTableEncoding() const { + // If we don't have a 32-bit pc-relative branch instruction then the jump + // table consists of block addresses. Usually this is inline, but for + // execute-only it must be placed out-of-line. + if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps()) + return MachineJumpTableInfo::EK_BlockAddress; return MachineJumpTableInfo::EK_Inline; } diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp --- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp +++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp @@ -760,12 +760,10 @@ IRBuilder IRB(DomLoad->getParent(), ++BasicBlock::iterator(DomLoad)); - // Bitcast the pointer to a wider type and create the wide load, while making - // sure to maintain the original alignment as this prevents ldrd from being - // generated when it could be illegal due to memory alignment. - const unsigned AddrSpace = DomLoad->getPointerAddressSpace(); - Value *VecPtr = IRB.CreateBitCast(Base->getPointerOperand(), - LoadTy->getPointerTo(AddrSpace)); + // Create the wide load, while making sure to maintain the original alignment + // as this prevents ldrd from being generated when it could be illegal due to + // memory alignment. + Value *VecPtr = Base->getPointerOperand(); LoadInst *WideLoad = IRB.CreateAlignedLoad(LoadTy, VecPtr, Base->getAlign()); // Make sure everything is in the correct order in the basic block. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -1726,10 +1726,8 @@ // that requires multivalue support in the toolchain, which is currently not // very reliable. We instead throw and catch a pointer to a struct value of // type 'struct __WasmLongjmpArgs', which is defined in Emscripten. - Instruction *CatchCI = + Instruction *LongjmpArgs = IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::C_LONGJMP)}, "thrown"); - Value *LongjmpArgs = - IRB.CreateBitCast(CatchCI, LongjmpArgsTy->getPointerTo(), "longjmp.args"); Value *EnvField = IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 0, "env_gep"); Value *ValField = diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1706,8 +1706,7 @@ Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled, OREGetter, IsBitSet); } else { - Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo()); - Value *Val = B.CreateLoad(RetType, ValAddr); + Value *Val = B.CreateLoad(RetType, Addr); NumVirtConstProp++; Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, OREGetter, Val); diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -2156,9 +2156,8 @@ ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx); IRBuilder<> IRB(Pos); - Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo()); Value *CombinedWideShadow = - IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign); + IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign); unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth(); const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits; @@ -2195,10 +2194,10 @@ // shadow). for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size; ByteOfs += BytesPerWideShadow) { - WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr, - ConstantInt::get(DFS.IntptrTy, 1)); + ShadowAddr = IRB.CreateGEP(WideShadowTy, ShadowAddr, + ConstantInt::get(DFS.IntptrTy, 1)); Value *NextWideShadow = - IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign); + IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign); CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow); if (ShouldTrackOrigins) { Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1099,6 +1099,41 @@ ReproducerCondStack.pop_back(); } +/// Check if the first condition for an AND implies the second. +static bool checkAndSecondOpImpliedByFirst( + FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule, + SmallVectorImpl &ReproducerCondStack, + SmallVectorImpl &DFSInStack) { + CmpInst::Predicate Pred; + Value *A, *B; + Instruction *And = CB.getContextInst(); + if (!match(And->getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B)))) + return false; + + // Optimistically add fact from first condition. + unsigned OldSize = DFSInStack.size(); + Info.addFact(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack); + if (OldSize == DFSInStack.size()) + return false; + + bool Changed = false; + // Check if the second condition can be simplified now. + if (auto ImpliedCondition = + checkCondition(cast(And->getOperand(1)), Info, CB.NumIn, + CB.NumOut, CB.getContextInst())) { + And->setOperand(1, ConstantInt::getBool(And->getType(), *ImpliedCondition)); + Changed = true; + } + + // Remove entries again. + while (OldSize < DFSInStack.size()) { + StackEntry E = DFSInStack.back(); + removeEntryFromStack(E, Info, ReproducerModule, ReproducerCondStack, + DFSInStack); + } + return Changed; +} + void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B, unsigned NumIn, unsigned NumOut, SmallVectorImpl &DFSInStack) { @@ -1300,9 +1335,16 @@ if (auto *II = dyn_cast(Inst)) { Changed |= tryToSimplifyOverflowMath(II, Info, ToRemove); } else if (auto *Cmp = dyn_cast(Inst)) { - Changed |= checkAndReplaceCondition( + bool Simplified = checkAndReplaceCondition( Cmp, Info, CB.NumIn, CB.NumOut, CB.getContextInst(), ReproducerModule.get(), ReproducerCondStack, S.DT); + if (!Simplified && match(CB.getContextInst(), + m_LogicalAnd(m_Value(), m_Specific(Inst)))) { + Simplified = + checkAndSecondOpImpliedByFirst(CB, Info, ReproducerModule.get(), + ReproducerCondStack, DFSInStack); + } + Changed |= Simplified; } continue; } diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp --- a/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -1034,7 +1034,7 @@ if (IsOldCtorDtor) { // FIXME: This upgrade is done during linking to support the C API. See // also IRLinker::linkAppendingVarProto() in IRMover.cpp. - VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo(); + VoidPtrTy = PointerType::getUnqual(GV.getContext()); auto &ST = *cast(NewMembers.front()->getType()); Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; EltTy = StructType::get(GV.getContext(), Tys, false); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9393,9 +9393,9 @@ // process to keep correct order. auto *VecTy = FixedVectorType::get(E->Scalars.front()->getType(), E->getVectorFactor()); - Value *Vec = Builder.CreateAlignedLoad( - VecTy, PoisonValue::get(VecTy->getPointerTo()), MaybeAlign()); - return Vec; + return Builder.CreateAlignedLoad( + VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())), + MaybeAlign()); } /// Adds 2 input vectors and the mask for their shuffling. void add(Value *V1, Value *V2, ArrayRef Mask) { @@ -10371,20 +10371,17 @@ LoadInst *LI = cast(VL0); Instruction *NewLI; - unsigned AS = LI->getPointerAddressSpace(); Value *PO = LI->getPointerOperand(); if (E->State == TreeEntry::Vectorize) { - Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS)); - NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); + NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign()); - // The pointer operand uses an in-tree scalar so we add the new BitCast - // or LoadInst to ExternalUses list to make sure that an extract will + // The pointer operand uses an in-tree scalar so we add the new + // LoadInst to ExternalUses list to make sure that an extract will // be generated in the future. if (TreeEntry *Entry = getTreeEntry(PO)) { // Find which lane we need to extract. unsigned FoundLane = Entry->findLaneForValue(PO); - ExternalUses.emplace_back( - PO, PO != VecPtr ? cast(VecPtr) : NewLI, FoundLane); + ExternalUses.emplace_back(PO, NewLI, FoundLane); } } else { assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state"); @@ -14413,11 +14410,12 @@ /// of the second cmp instruction. template static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, - function_ref IsDeleted) { + const DominatorTree &DT) { + assert(isValidElementType(V->getType()) && + isValidElementType(V2->getType()) && + "Expected valid element types only."); auto *CI1 = cast(V); auto *CI2 = cast(V2); - if (IsDeleted(CI2) || !isValidElementType(CI2->getType())) - return false; if (CI1->getOperand(0)->getType()->getTypeID() < CI2->getOperand(0)->getType()->getTypeID()) return !IsCompatibility; @@ -14446,12 +14444,27 @@ return false; if (auto *I1 = dyn_cast(Op1)) if (auto *I2 = dyn_cast(Op2)) { - if (I1->getParent() != I2->getParent()) - return false; + if (IsCompatibility) { + if (I1->getParent() != I2->getParent()) + return false; + } else { + // Try to compare nodes with same parent. + DomTreeNodeBase *NodeI1 = DT.getNode(I1->getParent()); + DomTreeNodeBase *NodeI2 = DT.getNode(I2->getParent()); + if (!NodeI1) + return NodeI2 != nullptr; + if (!NodeI2) + return false; + assert((NodeI1 == NodeI2) == + (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) && + "Different nodes should have different DFS numbers"); + if (NodeI1 != NodeI2) + return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); + } InstructionsState S = getSameOpcode({I1, I2}, TLI); - if (S.getOpcode()) + if (S.getOpcode() && (IsCompatibility || !S.isAltShuffle())) continue; - return false; + return !IsCompatibility && I1->getOpcode() < I2->getOpcode(); } } return IsCompatibility; @@ -14478,18 +14491,23 @@ // Try to vectorize list of compares. // Sort by type, compare predicate, etc. auto CompareSorter = [&](Value *V, Value *V2) { - return compareCmp(V, V2, *TLI, - [&R](Instruction *I) { return R.isDeleted(I); }); + if (V == V2) + return false; + return compareCmp(V, V2, *TLI, *DT); }; auto AreCompatibleCompares = [&](Value *V1, Value *V2) { if (V1 == V2) return true; - return compareCmp(V1, V2, *TLI, - [&R](Instruction *I) { return R.isDeleted(I); }); + return compareCmp(V1, V2, *TLI, *DT); }; - SmallVector Vals(CmpInsts.begin(), CmpInsts.end()); + SmallVector Vals; + for (Instruction *V : CmpInsts) + if (!R.isDeleted(V) && isValidElementType(V->getType())) + Vals.push_back(V); + if (Vals.size() <= 1) + return Changed; Changed |= tryToVectorizeSequence( Vals, CompareSorter, AreCompatibleCompares, [this, &R](ArrayRef Candidates, bool MaxVFOnly) { diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-dot-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-dot-crash.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dup-dot-crash.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-dot-crash.ll @@ -5,7 +5,7 @@ ; generated. Where it tries to generate a ZextOrTrunc node with floating point ; type resulting in a crash. ; See https://reviews.llvm.org/D128144#4280024 for context -define void @dot_product(double %a) { +define double @dot_product(double %a) { ; CHECK-LABEL: dot_product: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov d1, #1.00000000 @@ -14,7 +14,9 @@ ; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: fsqrt d0, d0 +; CHECK-NEXT: fmul d2, d0, d1 ; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: fcsel d0, d1, d2, gt ; CHECK-NEXT: ret entry: %fadd = call double @llvm.vector.reduce.fadd.v3f64(double %a, <3 x double> ) @@ -29,10 +31,10 @@ bb.1: %mul.2 = fmul double %shuffle.1, 0.000000e+00 - br label %exit + ret double %mul.2 exit: - ret void + ret double 0.0 } declare double @llvm.sqrt.f64(double) diff --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -645,13 +645,15 @@ ; CHECK-MACHO: ldp x20, x19, [sp], #32 ; CHECK-MACHO: ret +declare void @use(ptr) -define void @realign_conditional(i1 %b) { +define void @realign_conditional(i1 %b, ptr %p) { entry: br i1 %b, label %bb0, label %bb1 bb0: %MyAlloca = alloca i8, i64 64, align 32 + store ptr %MyAlloca, ptr %p br label %bb1 bb1: @@ -665,18 +667,20 @@ ; CHECK: tbz {{.*}} .[[LABEL:.*]] ; Stack is realigned in a non-entry BB. ; CHECK: sub [[REG:x[01-9]+]], sp, #64 -; CHECK: and sp, [[REG]], #0xffffffffffffffe0 +; CHECK: and [[REG]], [[REG]], #0xffffffffffffffe0 +; CHECK: mov sp, [[REG]] ; CHECK: .[[LABEL]]: ; CHECK: ret -define void @realign_conditional2(i1 %b) { +define void @realign_conditional2(i1 %b, ptr %p) { entry: %tmp = alloca i8, i32 16 br i1 %b, label %bb0, label %bb1 bb0: %MyAlloca = alloca i8, i64 64, align 32 + store ptr %MyAlloca, ptr %p br label %bb1 bb1: @@ -691,7 +695,8 @@ ; CHECK: mov x19, sp ; Stack is realigned in a non-entry BB. ; CHECK: sub [[REG:x[01-9]+]], sp, #64 -; CHECK: and sp, [[REG]], #0xffffffffffffffe0 +; CHECK: and [[REG]], [[REG]], #0xffffffffffffffe0 +; CHECK: mov sp, [[REG]] ; CHECK: .[[LABEL]]: ; CHECK: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -3435,3 +3435,97 @@ %c = ashr <1 x i64> %a, %b ret <1 x i64> %c } + +define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: sqshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: uqshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: srshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: urshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: sqshlu_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: sqshlu.2d v0, v0, #0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: sshl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) { +; CHECK-LABEL: ushl_zero_shift_amount: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: addp.2d v0, v0, v1 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/ARM/execute-only.ll b/llvm/test/CodeGen/ARM/execute-only.ll --- a/llvm/test/CodeGen/ARM/execute-only.ll +++ b/llvm/test/CodeGen/ARM/execute-only.ll @@ -45,6 +45,27 @@ ; CHECK-NEXT: b.w ; CHECK-NEXT: b.w +; CHECK-T1-LABEL: jump_table: +; CHECK-T1: lsls [[REG_OFFSET:r[0-9]+]], {{r[0-9]+}}, #2 +; CHECK-T1-NEXT: movs [[REG_JT:r[0-9]+]], :upper8_15:.LJTI1_0 +; CHECK-T1-NEXT: lsls [[REG_JT]], [[REG_JT]], #8 +; CHECK-T1-NEXT: adds [[REG_JT]], :upper0_7:.LJTI1_0 +; CHECK-T1-NEXT: lsls [[REG_JT]], [[REG_JT]], #8 +; CHECK-T1-NEXT: adds [[REG_JT]], :lower8_15:.LJTI1_0 +; CHECK-T1-NEXT: lsls [[REG_JT]], [[REG_JT]], #8 +; CHECK-T1-NEXT: adds [[REG_JT]], :lower0_7:.LJTI1_0 +; CHECK-T1-NEXT: ldr [[REG_ENTRY:r[0-9]+]], [[[REG_JT]], [[REG_OFFSET]]] +; CHECK-T1-NEXT: mov pc, [[REG_ENTRY]] +; CHECK-T1: .section .rodata,"a",%progbits +; CHECK-T1-NEXT: .p2align 2, 0x0 +; CHECK-T1-NEXT: .LJTI1_0: +; CHECK-T1-NEXT: .long +; CHECK-T1-NEXT: .long +; CHECK-T1-NEXT: .long +; CHECK-T1-NEXT: .long +; CHECK-T1-NEXT: .long +; CHECK-T1-NEXT: .long + entry: switch i32 %c, label %return [ i32 1, label %sw.bb diff --git a/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll b/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll --- a/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll +++ b/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll @@ -6,7 +6,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[C_1]], [[T_1]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[C_1]], true ; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false @@ -31,7 +31,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: [[AND:%.*]] = select i1 [[C_1]], i1 [[T_1]], i1 false +; CHECK-NEXT: [[AND:%.*]] = select i1 [[C_1]], i1 true, i1 false ; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false @@ -55,7 +55,7 @@ ; CHECK-LABEL: @test_same_cond_for_and( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[C_1]], [[C_1]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[C_1]], true ; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false @@ -78,7 +78,7 @@ ; CHECK-LABEL: @test_same_cond_for_and_select_form( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 -; CHECK-NEXT: [[AND:%.*]] = select i1 [[C_1]], i1 [[C_1]], i1 false +; CHECK-NEXT: [[AND:%.*]] = select i1 [[C_1]], i1 true, i1 false ; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false @@ -325,7 +325,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[C_1]], [[T_1]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[C_1]], true ; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 true diff --git a/llvm/test/Transforms/ConstraintElimination/and.ll b/llvm/test/Transforms/ConstraintElimination/and.ll --- a/llvm/test/Transforms/ConstraintElimination/and.ll +++ b/llvm/test/Transforms/ConstraintElimination/and.ll @@ -459,7 +459,7 @@ ; CHECK-NEXT: [[C_2:%.*]] = icmp ule i4 [[Y]], [[Z:%.*]] ; CHECK-NEXT: [[C_3:%.*]] = icmp ule i4 3, [[X]] ; CHECK-NEXT: [[C_4:%.*]] = icmp ule i4 3, [[A:%.*]] -; CHECK-NEXT: [[AND_1:%.*]] = select i1 [[C_1]], i1 [[C_1]], i1 false +; CHECK-NEXT: [[AND_1:%.*]] = select i1 [[C_1]], i1 true, i1 false ; CHECK-NEXT: [[AND_2:%.*]] = select i1 [[AND_1]], i1 [[C_3]], i1 false ; CHECK-NEXT: [[AND_3:%.*]] = select i1 [[C_4]], i1 [[AND_2]], i1 false ; CHECK-NEXT: br i1 [[AND_3]], label [[BB1:%.*]], label [[EXIT:%.*]] @@ -546,7 +546,7 @@ ; CHECK-NEXT: [[C_2:%.*]] = icmp ule i4 [[Y]], [[Z:%.*]] ; CHECK-NEXT: [[C_3:%.*]] = icmp ule i4 3, [[X]] ; CHECK-NEXT: [[C_4:%.*]] = icmp ule i4 3, [[A:%.*]] -; CHECK-NEXT: [[AND_1:%.*]] = select i1 [[C_1]], i1 [[C_1]], i1 false +; CHECK-NEXT: [[AND_1:%.*]] = select i1 [[C_1]], i1 true, i1 false ; CHECK-NEXT: [[AND_2:%.*]] = select i1 [[AND_1]], i1 [[C_3]], i1 false ; CHECK-NEXT: [[AND_3:%.*]] = select i1 [[C_4]], i1 [[AND_2]], i1 false ; CHECK-NEXT: [[AND_4:%.*]] = select i1 [[AND_3]], i1 true, i1 false diff --git a/llvm/test/Transforms/ConstraintElimination/gep-arithmetic-signed-predicates.ll b/llvm/test/Transforms/ConstraintElimination/gep-arithmetic-signed-predicates.ll --- a/llvm/test/Transforms/ConstraintElimination/gep-arithmetic-signed-predicates.ll +++ b/llvm/test/Transforms/ConstraintElimination/gep-arithmetic-signed-predicates.ll @@ -616,7 +616,7 @@ ; CHECK: step.check: ; CHECK-NEXT: [[STEP_POS:%.*]] = icmp sge i16 [[STEP:%.*]], 0 ; CHECK-NEXT: [[STEP_SLT_N:%.*]] = icmp slt i16 [[STEP]], [[N]] -; CHECK-NEXT: [[AND_STEP:%.*]] = and i1 [[STEP_POS]], [[STEP_SLT_N]] +; CHECK-NEXT: [[AND_STEP:%.*]] = and i1 [[STEP_POS]], false ; CHECK-NEXT: br i1 [[AND_STEP]], label [[PTR_CHECK:%.*]], label [[EXIT:%.*]] ; CHECK: ptr.check: ; CHECK-NEXT: [[SRC_STEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i16 [[STEP]] diff --git a/llvm/test/Transforms/ConstraintElimination/geps-precondition-overflow-check.ll b/llvm/test/Transforms/ConstraintElimination/geps-precondition-overflow-check.ll --- a/llvm/test/Transforms/ConstraintElimination/geps-precondition-overflow-check.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-precondition-overflow-check.ll @@ -36,7 +36,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DST_5:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 5 ; CHECK-NEXT: [[DST_5_UGE:%.*]] = icmp uge ptr [[DST_5]], [[DST]] -; CHECK-NEXT: [[AND:%.*]] = and i1 [[DST_5_UGE]], [[DST_5_UGE]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[DST_5_UGE]], true ; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[DST_4:%.*]] = getelementptr i32, ptr [[DST]], i64 4 @@ -65,7 +65,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DST_5:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 5 ; CHECK-NEXT: [[DST_5_UGE:%.*]] = icmp uge ptr [[DST_5]], [[DST]] -; CHECK-NEXT: [[AND:%.*]] = and i1 [[DST_5_UGE]], [[DST_5_UGE]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[DST_5_UGE]], true ; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[DST_4:%.*]] = getelementptr i32, ptr [[DST]], i64 4 @@ -98,7 +98,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DST_5:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 5 ; CHECK-NEXT: [[DST_5_UGE:%.*]] = icmp uge ptr [[DST_5]], [[DST]] -; CHECK-NEXT: [[AND:%.*]] = and i1 [[DST_5_UGE]], [[DST_5_UGE]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[DST_5_UGE]], true ; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[DST_4:%.*]] = getelementptr i32, ptr [[DST]], i64 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred-parent.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred-parent.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred-parent.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred-parent.ll @@ -39,10 +39,10 @@ ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: ; CHECK-NEXT: [[CALL37:%.*]] = load i16, ptr poison, align 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL37]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL]], i32 7 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> , <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL37]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> , <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i16> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll --- a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll @@ -301,27 +301,27 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[DUMMY:%.*]] = add i32 [[X:%.*]], 5 +; CHECK-NEXT: call void @bar(i32 5) ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[DUMMY1:%.*]] = add i32 [[X]], 6 +; CHECK-NEXT: call void @bar(i32 6) ; CHECK-NEXT: [[GEPB:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[S:%.*]], i32 0, i32 1 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: [[GEPB_SINK:%.*]] = phi ptr [ [[GEPB]], [[IF_ELSE]] ], [ [[S]], [[IF_THEN]] ] -; CHECK-NEXT: store volatile i32 [[X]], ptr [[GEPB_SINK]], align 4 +; CHECK-NEXT: store volatile i32 [[X:%.*]], ptr [[GEPB_SINK]], align 4 ; CHECK-NEXT: ret i32 1 ; entry: br i1 %flag, label %if.then, label %if.else if.then: - %dummy = add i32 %x, 5 + call void @bar(i32 5) store volatile i32 %x, ptr %s br label %if.end if.else: - %dummy1 = add i32 %x, 6 + call void @bar(i32 6) %gepb = getelementptr inbounds %struct.anon, ptr %s, i32 0, i32 1 store volatile i32 %x, ptr %gepb br label %if.end @@ -522,10 +522,10 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[DUMMY:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: call void @bar(i32 1) ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[DUMMY2:%.*]] = add i32 [[X]], 4 +; CHECK-NEXT: call void @bar(i32 4) ; CHECK-NEXT: [[GEPB:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[S:%.*]], i32 0, i32 1 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -540,14 +540,14 @@ br i1 %flag, label %if.then, label %if.else if.then: - %dummy = add i32 %x, 1 + call void @bar(i32 1) %sv1 = load i32, ptr %s %ext1 = zext i32 %sv1 to i64 %cmp1 = icmp eq i64 %ext1, 56 br label %if.end if.else: - %dummy2 = add i32 %x, 4 + call void @bar(i32 4) %gepb = getelementptr inbounds %struct.anon, ptr %s, i32 0, i32 1 %sv2 = load i32, ptr %gepb %ext2 = zext i32 %sv2 to i64 diff --git a/llvm/test/Transforms/SimplifyCFG/assume.ll b/llvm/test/Transforms/SimplifyCFG/assume.ll --- a/llvm/test/Transforms/SimplifyCFG/assume.ll +++ b/llvm/test/Transforms/SimplifyCFG/assume.ll @@ -130,5 +130,69 @@ ret i32 %phi } +define void @empty_block_with_assume(i1 %c, i32 %x) { +; CHECK-LABEL: @empty_block_with_assume( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if, label %else + +if: + %cmp = icmp ne i32 %x, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +else: + call void @dummy() + br label %join + +join: + ret void +} + +define void @not_empty_block_with_assume(i1 %c) { +; CHECK-LABEL: @not_empty_block_with_assume( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[X:%.*]] = call i32 @may_have_side_effect() +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if, label %else + +if: + %x = call i32 @may_have_side_effect() + %cmp = icmp ne i32 %x, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +else: + call void @dummy() + br label %join + +join: + ret void +} + +declare void @dummy() +declare i32 @may_have_side_effect() declare void @llvm.assume(i1) nounwind diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-dbg.ll --- a/llvm/test/Transforms/SimplifyCFG/branch-fold-dbg.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-dbg.ll @@ -5,7 +5,7 @@ @0 = external hidden constant [5 x %0], align 4 -define void @foo(i32) nounwind ssp !dbg !0 { +define i1 @foo(i32) nounwind ssp !dbg !0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: Entry: ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0:%.*]], 0 @@ -19,12 +19,12 @@ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [5 x %0], ptr @[[GLOB0:[0-9]+]], i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq ptr [[TMP6]], null ; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[TMP5]], i1 true, i1 [[TMP7]] -; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET]], label [[BB4:%.*]] -; CHECK: common.ret: -; CHECK-NEXT: ret void -; CHECK: BB4: ; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP0]], 0 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 [[TMP8]] ; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[BB2]] ] +; CHECK-NEXT: ret i1 [[COMMON_RET_OP]] ; Entry: %1 = icmp slt i32 %0, 0, !dbg !5 @@ -49,10 +49,10 @@ BB4: ; preds = %BB3 %8 = icmp slt i32 %0, 0, !dbg !5 - ret void, !dbg !14 + ret i1 %8, !dbg !14 BB5: ; preds = %BB3, %BB2, %BB1, %Entry - ret void, !dbg !14 + ret i1 false, !dbg !14 } declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp --- a/llvm/unittests/Support/CommandLineTest.cpp +++ b/llvm/unittests/Support/CommandLineTest.cpp @@ -1060,7 +1060,6 @@ ASSERT_STREQ(Argv[0], "clang"); ASSERT_STREQ(Argv[1], AFileExp.c_str()); -#if !defined(_AIX) && !defined(__MVS__) std::string ADirExp = std::string("@") + std::string(ADir.path()); Argv = {"clang", ADirExp.c_str()}; Res = cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv); @@ -1068,7 +1067,6 @@ ASSERT_EQ(2U, Argv.size()); ASSERT_STREQ(Argv[0], "clang"); ASSERT_STREQ(Argv[1], ADirExp.c_str()); -#endif } TEST(CommandLineTest, SetDefaultValue) { diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -41,6 +41,15 @@ namespace { +/// Sorting predicate to sort record pointers by their +/// FieldName field. +struct LessRecordFieldFieldName { + bool operator()(const Record *Rec1, const Record *Rec2) const { + return Rec1->getValueAsString("FieldName") < + Rec2->getValueAsString("FieldName"); + } +}; + class SubtargetEmitter { // Each processor has a SchedClassDesc table with an entry for each SchedClass. // The SchedClassDesc table indexes into a global write resource table, write @@ -202,7 +211,7 @@ std::vector FeatureList = Records.getAllDerivedDefinitions("SubtargetFeature"); - llvm::sort(FeatureList, LessRecordFieldName()); + llvm::sort(FeatureList, LessRecordFieldFieldName()); for (const Record *Feature : FeatureList) { const StringRef FieldName = Feature->getValueAsString("FieldName"); diff --git a/mlir/include/mlir/IR/BuiltinDialectBytecode.td b/mlir/include/mlir/IR/BuiltinDialectBytecode.td --- a/mlir/include/mlir/IR/BuiltinDialectBytecode.td +++ b/mlir/include/mlir/IR/BuiltinDialectBytecode.td @@ -275,18 +275,17 @@ Array:$shape, Type:$elementType )> { - let printerPredicate = "!$_val.getNumScalableDims()"; + let printerPredicate = "!$_val.isScalable()"; } def VectorTypeWithScalableDims : DialectType<(type Array:$scalableDims, - VarInt:$numScalableDims, Array:$shape, Type:$elementType )> { - let printerPredicate = "$_val.getNumScalableDims()"; + let printerPredicate = "$_val.isScalable()"; // Note: order of serialization does not match order of builder. - let cBuilder = "get<$_resultType>(context, shape, elementType, numScalableDims, scalableDims)"; + let cBuilder = "get<$_resultType>(context, shape, elementType, scalableDims)"; } } diff --git a/mlir/include/mlir/IR/BuiltinTypes.h b/mlir/include/mlir/IR/BuiltinTypes.h --- a/mlir/include/mlir/IR/BuiltinTypes.h +++ b/mlir/include/mlir/IR/BuiltinTypes.h @@ -306,23 +306,20 @@ /// Build from another VectorType. explicit Builder(VectorType other) : shape(other.getShape()), elementType(other.getElementType()), - numScalableDims(other.getNumScalableDims()), scalableDims(other.getScalableDims()) {} /// Build from scratch. Builder(ArrayRef shape, Type elementType, unsigned numScalableDims = 0, ArrayRef scalableDims = {}) - : shape(shape), elementType(elementType), - numScalableDims(numScalableDims) { + : shape(shape), elementType(elementType) { if (scalableDims.empty()) scalableDims = SmallVector(shape.size(), false); else this->scalableDims = scalableDims; } - Builder &setShape(ArrayRef newShape, unsigned newNumScalableDims = 0, + Builder &setShape(ArrayRef newShape, ArrayRef newIsScalableDim = {}) { - numScalableDims = newNumScalableDims; if (newIsScalableDim.empty()) scalableDims = SmallVector(shape.size(), false); else @@ -340,8 +337,6 @@ /// Erase a dim from shape @pos. Builder &dropDim(unsigned pos) { assert(pos < shape.size() && "overflow"); - if (pos >= shape.size() - numScalableDims) - numScalableDims--; if (storage.empty()) storage.append(shape.begin(), shape.end()); if (storageScalableDims.empty()) @@ -360,7 +355,7 @@ operator Type() { if (shape.empty()) return elementType; - return VectorType::get(shape, elementType, numScalableDims, scalableDims); + return VectorType::get(shape, elementType, scalableDims); } private: @@ -368,7 +363,6 @@ // Owning shape data for copy-on-write operations. SmallVector storage; Type elementType; - unsigned numScalableDims; ArrayRef scalableDims; // Owning scalableDims data for copy-on-write operations. SmallVector storageScalableDims; diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td --- a/mlir/include/mlir/IR/BuiltinTypes.td +++ b/mlir/include/mlir/IR/BuiltinTypes.td @@ -1066,13 +1066,11 @@ let parameters = (ins ArrayRefParameter<"int64_t">:$shape, "Type":$elementType, - "unsigned":$numScalableDims, ArrayRefParameter<"bool">:$scalableDims ); let builders = [ TypeBuilderWithInferredContext<(ins "ArrayRef":$shape, "Type":$elementType, - CArg<"unsigned", "0">:$numScalableDims, CArg<"ArrayRef", "{}">:$scalableDims ), [{ // While `scalableDims` is optional, its default value should be @@ -1082,8 +1080,7 @@ isScalableVec.resize(shape.size(), false); scalableDims = isScalableVec; } - return $_get(elementType.getContext(), shape, elementType, - numScalableDims, scalableDims); + return $_get(elementType.getContext(), shape, elementType, scalableDims); }]> ]; let extraClassDeclaration = [{ @@ -1100,7 +1097,13 @@ /// Returns true if the vector contains scalable dimensions. bool isScalable() const { - return getNumScalableDims() > 0; + return llvm::is_contained(getScalableDims(), true); + } + bool allDimsScalable() const { + // Treat 0-d vectors as fixed size. + if (getRank() == 0) + return false; + return !llvm::is_contained(getScalableDims(), false); } /// Get or create a new VectorType with the same shape as `this` and an diff --git a/mlir/lib/AsmParser/Parser.h b/mlir/lib/AsmParser/Parser.h --- a/mlir/lib/AsmParser/Parser.h +++ b/mlir/lib/AsmParser/Parser.h @@ -211,7 +211,6 @@ /// Parse a vector type. VectorType parseVectorType(); ParseResult parseVectorDimensionList(SmallVectorImpl &dimensions, - unsigned &numScalableDims, SmallVectorImpl &scalableDims); ParseResult parseDimensionListRanked(SmallVectorImpl &dimensions, bool allowDynamic = true, diff --git a/mlir/lib/AsmParser/TypeParser.cpp b/mlir/lib/AsmParser/TypeParser.cpp --- a/mlir/lib/AsmParser/TypeParser.cpp +++ b/mlir/lib/AsmParser/TypeParser.cpp @@ -441,8 +441,7 @@ SmallVector dimensions; SmallVector scalableDims; - unsigned numScalableDims; - if (parseVectorDimensionList(dimensions, numScalableDims, scalableDims)) + if (parseVectorDimensionList(dimensions, scalableDims)) return nullptr; if (any_of(dimensions, [](int64_t i) { return i <= 0; })) return emitError(getToken().getLoc(), @@ -459,16 +458,13 @@ return emitError(typeLoc, "vector elements must be int/index/float type"), nullptr; - return VectorType::get(dimensions, elementType, numScalableDims, - scalableDims); + return VectorType::get(dimensions, elementType, scalableDims); } /// Parse a dimension list in a vector type. This populates the dimension list. /// For i-th dimension, `scalableDims[i]` contains either: /// * `false` for a non-scalable dimension (e.g. `4`), /// * `true` for a scalable dimension (e.g. `[4]`). -/// This method also returns the number of scalable dimensions in -/// `numScalableDims`. /// /// vector-dim-list := (static-dim-list `x`)? /// static-dim-list ::= static-dim (`x` static-dim)* @@ -476,9 +472,7 @@ /// ParseResult Parser::parseVectorDimensionList(SmallVectorImpl &dimensions, - unsigned &numScalableDims, SmallVectorImpl &scalableDims) { - numScalableDims = 0; // If there is a set of fixed-length dimensions, consume it while (getToken().is(Token::integer) || getToken().is(Token::l_square)) { int64_t value; @@ -489,7 +483,6 @@ if (scalable) { if (!consumeIf(Token::r_square)) return emitWrongTokenError("missing ']' closing scalable dimension"); - numScalableDims++; } scalableDims.push_back(scalable); // Make sure we have an 'x' or something like 'xbf32'. diff --git a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp --- a/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp +++ b/mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp @@ -463,11 +463,12 @@ return {}; if (type.getShape().empty()) return VectorType::get({1}, elementType); - Type vectorType = - VectorType::get(type.getShape().back(), elementType, - type.getNumScalableDims(), type.getScalableDims().back()); + Type vectorType = VectorType::get(type.getShape().back(), elementType, + type.getScalableDims().back()); assert(LLVM::isCompatibleVectorType(vectorType) && "expected vector type compatible with the LLVM dialect"); + assert((type.isScalable() == type.allDimsScalable()) && + "expected scalable vector with all dims scalable"); auto shape = type.getShape(); for (int i = shape.size() - 2; i >= 0; --i) vectorType = LLVM::LLVMArrayType::get(vectorType, shape[i]); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -31,21 +31,15 @@ // Helper to reduce vector type by one rank at front. static VectorType reducedVectorTypeFront(VectorType tp) { assert((tp.getRank() > 1) && "unlowerable vector type"); - unsigned numScalableDims = tp.getNumScalableDims(); - if (tp.getShape().size() == numScalableDims) - --numScalableDims; return VectorType::get(tp.getShape().drop_front(), tp.getElementType(), - numScalableDims); + tp.getScalableDims().drop_front()); } // Helper to reduce vector type by *all* but one rank at back. static VectorType reducedVectorTypeBack(VectorType tp) { assert((tp.getRank() > 1) && "unlowerable vector type"); - unsigned numScalableDims = tp.getNumScalableDims(); - if (numScalableDims > 0) - --numScalableDims; return VectorType::get(tp.getShape().take_back(), tp.getElementType(), - numScalableDims); + tp.getScalableDims().take_back()); } // Helper that picks the proper sequence for inserting. diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -123,7 +123,6 @@ return UnrankedTensorType::get(i1Type); if (auto vectorType = llvm::dyn_cast(type)) return VectorType::get(vectorType.getShape(), i1Type, - vectorType.getNumScalableDims(), vectorType.getScalableDims()); return i1Type; } diff --git a/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp b/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp --- a/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp +++ b/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp @@ -30,7 +30,6 @@ auto i1Type = IntegerType::get(type.getContext(), 1); if (auto sVectorType = llvm::dyn_cast(type)) return VectorType::get(sVectorType.getShape(), i1Type, - sVectorType.getNumScalableDims(), sVectorType.getScalableDims()); return nullptr; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -995,10 +995,7 @@ // LLVM vectors are always 1-D, hence only 1 bool is required to mark it as // scalable/non-scalable. - SmallVector scalableDims(1, isScalable); - - return VectorType::get(numElements, elementType, - static_cast(isScalable), scalableDims); + return VectorType::get(numElements, elementType, {isScalable}); } Type mlir::LLVM::getVectorType(Type elementType, @@ -1030,7 +1027,10 @@ "type"); if (useLLVM) return LLVMScalableVectorType::get(elementType, numElements); - return VectorType::get(numElements, elementType, /*numScalableDims=*/1); + + // LLVM vectors are always 1-D, hence only 1 bool is required to mark it as + // scalable/non-scalable. + return VectorType::get(numElements, elementType, /*scalableDims=*/true); } llvm::TypeSize mlir::LLVM::getPrimitiveTypeSizeInBits(Type type) { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -223,10 +223,7 @@ assert(areValidScalableVecDims(scalableDims) && "Permuted scalable vector dimensions are not supported"); - // TODO: Extend scalable vector type to support a bit map. - bool numScalableDims = !scalableVecDims.empty() && scalableVecDims.back(); - return VectorType::get(vectorShape, elementType, numScalableDims, - scalableDims); + return VectorType::get(vectorShape, elementType, scalableDims); } /// Masks an operation with the canonical vector mask if the operation needs @@ -1228,7 +1225,6 @@ if (firstMaxRankedType) { auto vecType = VectorType::get(firstMaxRankedType.getShape(), getElementTypeOrSelf(vecOperand.getType()), - firstMaxRankedType.getNumScalableDims(), firstMaxRankedType.getScalableDims()); vecOperands.push_back(broadcastIfNeeded(rewriter, vecOperand, vecType)); } else { @@ -1241,7 +1237,6 @@ resultTypes.push_back( firstMaxRankedType ? VectorType::get(firstMaxRankedType.getShape(), resultType, - firstMaxRankedType.getNumScalableDims(), firstMaxRankedType.getScalableDims()) : resultType); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp @@ -56,9 +56,7 @@ /// Constructs vector type for element type. static VectorType vectorType(VL vl, Type etp) { - unsigned numScalableDims = vl.enableVLAVectorization; - return VectorType::get(vl.vectorLength, etp, numScalableDims, - vl.enableVLAVectorization); + return VectorType::get(vl.vectorLength, etp, vl.enableVLAVectorization); } /// Constructs vector type from a memref value. diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -1176,7 +1176,7 @@ // Inspect source type. For vector types, apply the same // vectorization to the destination type. if (auto vtp = dyn_cast(src.getType())) - return VectorType::get(vtp.getNumElements(), dtp, vtp.getNumScalableDims()); + return VectorType::get(vtp.getNumElements(), dtp, vtp.getScalableDims()); return dtp; } diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -345,9 +345,9 @@ /// Returns the mask type expected by this operation. Type MultiDimReductionOp::getExpectedMaskType() { auto vecType = getSourceVectorType(); - return VectorType::get( - vecType.getShape(), IntegerType::get(vecType.getContext(), /*width=*/1), - vecType.getNumScalableDims(), vecType.getScalableDims()); + return VectorType::get(vecType.getShape(), + IntegerType::get(vecType.getContext(), /*width=*/1), + vecType.getScalableDims()); } namespace { @@ -484,9 +484,9 @@ /// Returns the mask type expected by this operation. Type ReductionOp::getExpectedMaskType() { auto vecType = getSourceVectorType(); - return VectorType::get( - vecType.getShape(), IntegerType::get(vecType.getContext(), /*width=*/1), - vecType.getNumScalableDims(), vecType.getScalableDims()); + return VectorType::get(vecType.getShape(), + IntegerType::get(vecType.getContext(), /*width=*/1), + vecType.getScalableDims()); } Value mlir::vector::getVectorReductionOp(arith::AtomicRMWKind op, @@ -929,8 +929,7 @@ assert(!ShapedType::isDynamicShape(maskShape) && "Mask shape couldn't be computed"); // TODO: Extend the scalable vector type representation with a bit map. - assert(lhsType.getNumScalableDims() == 0 && - rhsType.getNumScalableDims() == 0 && + assert(!lhsType.isScalable() && !rhsType.isScalable() && "Scalable vectors are not supported yet"); return VectorType::get(maskShape, @@ -2792,18 +2791,13 @@ if (vRHS) { SmallVector scalableDimsRes{vLHS.getScalableDims()[0], vRHS.getScalableDims()[0]}; - auto numScalableDims = - count_if(scalableDimsRes, [](bool isScalable) { return isScalable; }); resType = VectorType::get({vLHS.getDimSize(0), vRHS.getDimSize(0)}, - vLHS.getElementType(), numScalableDims, - scalableDimsRes); + vLHS.getElementType(), scalableDimsRes); } else { // Scalar RHS operand SmallVector scalableDimsRes{vLHS.getScalableDims()[0]}; - auto numScalableDims = - count_if(scalableDimsRes, [](bool isScalable) { return isScalable; }); resType = VectorType::get({vLHS.getDimSize(0)}, vLHS.getElementType(), - numScalableDims, scalableDimsRes); + scalableDimsRes); } if (!result.attributes.get(OuterProductOp::getKindAttrStrName())) { @@ -2867,9 +2861,9 @@ /// verification purposes. It requires the operation to be vectorized." Type OuterProductOp::getExpectedMaskType() { auto vecType = this->getResultVectorType(); - return VectorType::get( - vecType.getShape(), IntegerType::get(vecType.getContext(), /*width=*/1), - vecType.getNumScalableDims(), vecType.getScalableDims()); + return VectorType::get(vecType.getShape(), + IntegerType::get(vecType.getContext(), /*width=*/1), + vecType.getScalableDims()); } //===----------------------------------------------------------------------===// @@ -3528,8 +3522,7 @@ SmallVector scalableDims = applyPermutationMap(invPermMap, vecType.getScalableDims()); - return VectorType::get(maskShape, i1Type, vecType.getNumScalableDims(), - scalableDims); + return VectorType::get(maskShape, i1Type, scalableDims); } ParseResult TransferReadOp::parse(OpAsmParser &parser, OperationState &result) { @@ -4487,9 +4480,9 @@ /// verification purposes. It requires the operation to be vectorized." Type GatherOp::getExpectedMaskType() { auto vecType = this->getIndexVectorType(); - return VectorType::get( - vecType.getShape(), IntegerType::get(vecType.getContext(), /*width=*/1), - vecType.getNumScalableDims(), vecType.getScalableDims()); + return VectorType::get(vecType.getShape(), + IntegerType::get(vecType.getContext(), /*width=*/1), + vecType.getScalableDims()); } std::optional> GatherOp::getShapeForUnroll() { diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -1024,7 +1024,7 @@ Value mask = rewriter.create( loc, VectorType::get(vtp.getShape(), rewriter.getI1Type(), - vtp.getNumScalableDims()), + vtp.getScalableDims()), b); if (xferOp.getMask()) { // Intersect the in-bounds with the mask specified as an op parameter. diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp --- a/mlir/lib/IR/BuiltinTypes.cpp +++ b/mlir/lib/IR/BuiltinTypes.cpp @@ -227,7 +227,6 @@ LogicalResult VectorType::verify(function_ref emitError, ArrayRef shape, Type elementType, - unsigned numScalableDims, ArrayRef scalableDims) { if (!isValidElementType(elementType)) return emitError() @@ -239,21 +238,10 @@ << "vector types must have positive constant sizes but got " << shape; - if (numScalableDims > shape.size()) - return emitError() - << "number of scalable dims cannot exceed the number of dims" - << " (" << numScalableDims << " vs " << shape.size() << ")"; - if (scalableDims.size() != shape.size()) return emitError() << "number of dims must match, got " << scalableDims.size() << " and " << shape.size(); - auto numScale = - count_if(scalableDims, [](bool isScalable) { return isScalable; }); - if (numScale != numScalableDims) - return emitError() << "number of scalable dims must match, explicit: " - << numScalableDims << ", and bools:" << numScale; - return success(); } @@ -262,17 +250,17 @@ return VectorType(); if (auto et = llvm::dyn_cast(getElementType())) if (auto scaledEt = et.scaleElementBitwidth(scale)) - return VectorType::get(getShape(), scaledEt, getNumScalableDims()); + return VectorType::get(getShape(), scaledEt, getScalableDims()); if (auto et = llvm::dyn_cast(getElementType())) if (auto scaledEt = et.scaleElementBitwidth(scale)) - return VectorType::get(getShape(), scaledEt, getNumScalableDims()); + return VectorType::get(getShape(), scaledEt, getScalableDims()); return VectorType(); } VectorType VectorType::cloneWith(std::optional> shape, Type elementType) const { return VectorType::get(shape.value_or(getShape()), elementType, - getNumScalableDims()); + getScalableDims()); } //===----------------------------------------------------------------------===//